chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)

Group all 69 projects into category subfolders under src/ and tests/ so the
Rider Solution Explorer mirrors the module structure. Folders: Core, Server,
Drivers (with a nested Driver CLIs subfolder), Client, Tooling.

- Move every project folder on disk with git mv (history preserved as renames).
- Recompute relative paths in 57 .csproj files: cross-category ProjectReferences,
  the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external
  mxaccessgw refs in Driver.Galaxy and its test project.
- Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders.
- Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL,
  integration, install).

Build green (0 errors); unit tests pass. Docs left for a separate pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-17 01:55:28 -04:00
parent 69f02fed7f
commit a25593a9c6
1044 changed files with 365 additions and 343 deletions

View File

@@ -0,0 +1,289 @@
using System.Collections.Concurrent;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
/// <summary>
/// Server-level alarm-condition state machine. Tracks one entry per registered
/// condition; consumes value changes from the four sub-attribute references in
/// <see cref="AlarmConditionInfo"/> (InAlarm / Priority / Description / Acked) and
/// raises <see cref="TransitionRaised"/> on Active / Acknowledged / Inactive
/// transitions per OPC UA Part 9 (simplified). Operator acknowledgement routes
/// through <see cref="IAlarmAcknowledger"/> against <c>AckMsgWriteRef</c>.
/// </summary>
/// <remarks>
/// This is the driver-agnostic replacement for <c>GalaxyAlarmTracker</c>. The
/// service does not own subscription lifecycle — PR 2.3 will wire DriverNodeManager
/// to subscribe through the driver's <c>ISubscribable</c> and forward value changes
/// here via <see cref="OnValueChanged"/>. Keeping the service free of subscription
/// plumbing makes it trivially testable and lets future drivers feed it from any
/// value source (in-process, gRPC, named pipe).
/// </remarks>
public sealed class AlarmConditionService : IDisposable
{
private readonly Func<DateTime> _clock;
// ConditionId → state.
private readonly ConcurrentDictionary<string, AlarmConditionState> _conditions =
new(StringComparer.OrdinalIgnoreCase);
// Sub-attribute full ref → (conditionId, which field). Multiple conditions may
// observe the same sub-attribute (rare but legal); the value is a list to support
// fan-out on a single value change.
private readonly ConcurrentDictionary<string, List<(string ConditionId, AlarmField Field)>> _refToCondition =
new(StringComparer.OrdinalIgnoreCase);
private readonly object _refMapLock = new();
private bool _disposed;
/// <summary>
/// Fired when a registered condition transitions Active / Acknowledged / Inactive.
/// Handlers must be cheap; the event is raised on whatever thread feeds
/// <see cref="OnValueChanged"/> and blocks the value-change pipeline.
/// </summary>
public event EventHandler<AlarmConditionTransition>? TransitionRaised;
public AlarmConditionService() : this(() => DateTime.UtcNow) { }
/// <summary>Test seam — inject a fixed clock for deterministic transition timestamps.</summary>
internal AlarmConditionService(Func<DateTime> clock)
{
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
}
/// <summary>Number of currently tracked conditions. Diagnostic only.</summary>
public int TrackedCount => _conditions.Count;
/// <summary>
/// Register a condition. Idempotent — repeat calls for the same
/// <paramref name="conditionId"/> are a no-op. The acker is captured for the
/// condition's lifetime; pass null when the driver does not accept acks.
/// </summary>
public void Track(string conditionId, AlarmConditionInfo info, IAlarmAcknowledger? acker = null)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentException.ThrowIfNullOrWhiteSpace(conditionId);
ArgumentNullException.ThrowIfNull(info);
var state = new AlarmConditionState(conditionId, info, acker);
if (!_conditions.TryAdd(conditionId, state)) return;
lock (_refMapLock)
{
AddRefMapping(info.InAlarmRef, conditionId, AlarmField.InAlarm);
AddRefMapping(info.PriorityRef, conditionId, AlarmField.Priority);
AddRefMapping(info.DescAttrNameRef, conditionId, AlarmField.DescAttrName);
AddRefMapping(info.AckedRef, conditionId, AlarmField.Acked);
}
}
/// <summary>Deregister a condition. No-op when not tracked.</summary>
public void Untrack(string conditionId)
{
if (_disposed) return;
if (!_conditions.TryRemove(conditionId, out var state)) return;
lock (_refMapLock)
{
RemoveRefMapping(state.Info.InAlarmRef, conditionId);
RemoveRefMapping(state.Info.PriorityRef, conditionId);
RemoveRefMapping(state.Info.DescAttrNameRef, conditionId);
RemoveRefMapping(state.Info.AckedRef, conditionId);
}
}
/// <summary>
/// Returns the set of sub-attribute references the service currently needs
/// subscribed. Callers wire one subscription per ref through the driver's
/// <see cref="ISubscribable"/>; PR 2.3 owns that wiring.
/// </summary>
public IReadOnlyCollection<string> GetSubscribedReferences()
{
lock (_refMapLock) return [.. _refToCondition.Keys];
}
/// <summary>
/// Operator acknowledgement entry point. Returns false when the condition is
/// not tracked, the condition has no acker registered, the condition has no
/// <c>AckMsgWriteRef</c>, or the acker reports the write failed.
/// </summary>
public Task<bool> AcknowledgeAsync(string conditionId, string comment, CancellationToken cancellationToken = default)
{
if (_disposed || !_conditions.TryGetValue(conditionId, out var state))
return Task.FromResult(false);
if (state.Acker is null || string.IsNullOrEmpty(state.Info.AckMsgWriteRef))
return Task.FromResult(false);
return state.Acker.WriteAckMessageAsync(state.Info.AckMsgWriteRef, comment ?? string.Empty, cancellationToken);
}
/// <summary>
/// Snapshot every tracked condition's current state. Diagnostic / dashboard use only.
/// </summary>
public IReadOnlyList<AlarmConditionSnapshot> Snapshot()
{
return [.. _conditions.Values.Select(s =>
{
lock (s.Lock)
return new AlarmConditionSnapshot(s.ConditionId, s.InAlarm, s.Acked, s.Priority, s.Description);
})];
}
/// <summary>
/// Feed a value change for one of the registered sub-attribute references.
/// The service runs the state machine and raises <see cref="TransitionRaised"/>
/// when the change produces a lifecycle transition. Unknown references are
/// silently dropped — the caller may register and unregister concurrently with
/// value-change delivery, and a stale callback for a recently-untracked
/// condition must not throw.
/// </summary>
public void OnValueChanged(string fullReference, DataValueSnapshot value)
{
if (_disposed) return;
if (string.IsNullOrEmpty(fullReference)) return;
List<(string ConditionId, AlarmField Field)>? targets;
lock (_refMapLock)
{
if (!_refToCondition.TryGetValue(fullReference, out targets) || targets.Count == 0) return;
// Snapshot under lock; the state machine runs outside.
targets = [.. targets];
}
var now = _clock();
foreach (var (conditionId, field) in targets)
{
if (!_conditions.TryGetValue(conditionId, out var state)) continue;
AlarmConditionTransition? transition = null;
lock (state.Lock)
{
transition = ApplyValue(state, field, value, now);
}
if (transition is { } t)
{
TransitionRaised?.Invoke(this, t);
}
}
}
/// <summary>
/// Apply one value change to one condition. Returns a transition when the
/// change crosses a state boundary; null otherwise. Caller holds <c>state.Lock</c>.
/// </summary>
private static AlarmConditionTransition? ApplyValue(
AlarmConditionState state, AlarmField field, DataValueSnapshot value, DateTime now)
{
AlarmConditionTransition? transition = null;
state.LastUpdateUtc = now;
switch (field)
{
case AlarmField.InAlarm:
{
var wasActive = state.InAlarm;
var isActive = value.Value is bool b && b;
state.InAlarm = isActive;
if (!wasActive && isActive)
{
// Reset Acked on every active transition so a re-alarm requires fresh ack.
state.Acked = false;
transition = new AlarmConditionTransition(
state.ConditionId, AlarmStateTransition.Active,
state.Priority, state.Description, now);
}
else if (wasActive && !isActive)
{
transition = new AlarmConditionTransition(
state.ConditionId, AlarmStateTransition.Inactive,
state.Priority, state.Description, now);
}
break;
}
case AlarmField.Priority:
state.Priority = CoercePriority(value.Value, state.Priority);
break;
case AlarmField.DescAttrName:
state.Description = value.Value as string;
break;
case AlarmField.Acked:
{
var wasAcked = state.Acked;
var isAcked = value.Value is bool b && b;
state.Acked = isAcked;
// Only fire Acknowledged on false → true while still active. The first
// post-Track callback often arrives with isAcked == wasAcked (state starts
// Acked=true so an initially-quiet alarm doesn't misfire).
if (!wasAcked && isAcked && state.InAlarm)
{
transition = new AlarmConditionTransition(
state.ConditionId, AlarmStateTransition.Acknowledged,
state.Priority, state.Description, now);
}
break;
}
}
return transition;
}
private static int CoercePriority(object? raw, int fallback) => raw switch
{
int i => i,
short s => s,
long l when l <= int.MaxValue => (int)l,
byte b => b,
ushort us => us,
uint ui when ui <= int.MaxValue => (int)ui,
_ => fallback,
};
private void AddRefMapping(string? fullRef, string conditionId, AlarmField field)
{
if (string.IsNullOrEmpty(fullRef)) return;
if (!_refToCondition.TryGetValue(fullRef, out var list))
{
list = [];
_refToCondition[fullRef] = list;
}
list.Add((conditionId, field));
}
private void RemoveRefMapping(string? fullRef, string conditionId)
{
if (string.IsNullOrEmpty(fullRef)) return;
if (!_refToCondition.TryGetValue(fullRef, out var list)) return;
list.RemoveAll(t => string.Equals(t.ConditionId, conditionId, StringComparison.OrdinalIgnoreCase));
if (list.Count == 0) _refToCondition.TryRemove(fullRef, out _);
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_conditions.Clear();
lock (_refMapLock) _refToCondition.Clear();
}
private enum AlarmField { InAlarm, Priority, DescAttrName, Acked }
/// <summary>Per-condition mutable state. Access guarded by <see cref="Lock"/>.</summary>
private sealed class AlarmConditionState(string conditionId, AlarmConditionInfo info, IAlarmAcknowledger? acker)
{
public readonly object Lock = new();
public string ConditionId { get; } = conditionId;
public AlarmConditionInfo Info { get; } = info;
public IAlarmAcknowledger? Acker { get; } = acker;
public bool InAlarm;
// Default Acked=true so the first post-Track callback (.Acked=true on a quiet
// alarm) doesn't misfire as a transition. Active sets it back to false.
public bool Acked = true;
public int Priority;
public string? Description;
public DateTime LastUpdateUtc;
}
}

View File

@@ -0,0 +1,44 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
/// <summary>
/// Lifecycle transition for an alarm condition. Mirrors OPC UA Part 9 alarm states
/// simplified to the active / acknowledged / inactive triplet that every driver in
/// the repo exposes today.
/// </summary>
public enum AlarmStateTransition
{
/// <summary>InAlarm flipped false → true. Default to unacknowledged.</summary>
Active,
/// <summary>Acked flipped false → true while the alarm is still active.</summary>
Acknowledged,
/// <summary>InAlarm flipped true → false.</summary>
Inactive,
}
/// <summary>
/// One alarm-state transition raised by <see cref="AlarmConditionService.TransitionRaised"/>.
/// </summary>
/// <param name="ConditionId">Stable identifier the caller registered the condition under (typically the driver's alarm full reference).</param>
/// <param name="Transition">Which state the alarm transitioned to.</param>
/// <param name="Priority">Latest known priority. 0 when no priority sub-attribute was registered or no value has been observed yet.</param>
/// <param name="Description">Latest known description text; null when not registered or not yet observed.</param>
/// <param name="AtUtc">Server-clock UTC of the value change that produced this transition.</param>
public sealed record AlarmConditionTransition(
string ConditionId,
AlarmStateTransition Transition,
int Priority,
string? Description,
DateTime AtUtc);
/// <summary>
/// Read-only snapshot of an alarm condition's current state. Used for diagnostics
/// and dashboards; not part of the live transition stream.
/// </summary>
public sealed record AlarmConditionSnapshot(
string ConditionId,
bool InAlarm,
bool Acked,
int Priority,
string? Description);

View File

@@ -0,0 +1,23 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
/// <summary>
/// Strategy for routing operator acknowledgement writes back to the underlying driver.
/// Decouples <see cref="AlarmConditionService"/> from any specific driver's write API
/// so the service can be tested without a real driver and reused across drivers with
/// different write paths.
/// </summary>
/// <remarks>
/// PR 2.3 supplies a default implementation that writes through the driver's
/// <c>IWritable.WriteAsync</c> using the <c>AckMsgWriteRef</c> from
/// <c>AlarmConditionInfo</c>. Drivers that route acks differently (e.g. a dedicated
/// RPC) can supply a custom implementation when registering the condition.
/// </remarks>
public interface IAlarmAcknowledger
{
/// <summary>
/// Writes the operator's <paramref name="comment"/> to <paramref name="ackMsgWriteRef"/>.
/// Returns true on driver-reported success, false otherwise. Implementations should
/// propagate cancellation but never throw on a write that the driver cleanly rejects.
/// </summary>
Task<bool> WriteAckMessageAsync(string ackMsgWriteRef, string comment, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,138 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
using ZB.MOM.WW.OtOpcUa.Core.Stability;
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// Task #248 — bridges the gap surfaced by the Phase 7 live smoke (#240) where
/// <c>DriverInstance</c> rows in the central config DB had no path to materialise
/// as live <see cref="Core.Abstractions.IDriver"/> instances in <see cref="DriverHost"/>.
/// Called from <c>OpcUaServerService.ExecuteAsync</c> after the bootstrap loads
/// the published generation, before address-space build.
/// </summary>
/// <remarks>
/// <para>
/// Per row: looks up the <c>DriverType</c> string in
/// <see cref="DriverFactoryRegistry"/>, calls the factory with the row's
/// <c>DriverInstanceId</c> + <c>DriverConfig</c> JSON to construct an
/// <see cref="Core.Abstractions.IDriver"/>, then registers via
/// <see cref="DriverHost.RegisterAsync"/> which invokes <c>InitializeAsync</c>
/// under the host's lifecycle semantics.
/// </para>
/// <para>
/// Unknown <c>DriverType</c> = factory not registered = log a warning and skip.
/// Per plan decision #12 (driver isolation), failure to construct or initialize
/// one driver doesn't prevent the rest from coming up — the Server keeps serving
/// the others' subtrees + the operator can fix the misconfigured row + republish
/// to retry.
/// </para>
/// </remarks>
public sealed class DriverInstanceBootstrapper(
DriverFactoryRegistry factories,
DriverHost driverHost,
ScheduledRecycleHostedService recycleHost,
ILoggerFactory loggerFactory,
IServiceScopeFactory scopeFactory,
ILogger<DriverInstanceBootstrapper> logger)
{
// IDriverSupervisor instances, looked up by DriverInstanceId. The bootstrapper
// consults DI at run time because no driver ships a supervisor today — the
// dictionary is built from optional DI registrations; Tier C drivers that
// register one via `services.AddKeyedSingleton<IDriverSupervisor>(instanceId, ...)`
// become eligible for scheduled recycle. Others silently skip.
private readonly IReadOnlyDictionary<string, IDriverSupervisor> _supervisors =
scopeFactory.CreateScope().ServiceProvider
.GetServices<IDriverSupervisor>()
.ToDictionary(s => s.DriverInstanceId, StringComparer.OrdinalIgnoreCase);
public async Task<int> RegisterDriversFromGenerationAsync(long generationId, CancellationToken ct)
{
using var scope = scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
var rows = await db.DriverInstances.AsNoTracking()
.Where(d => d.GenerationId == generationId && d.Enabled)
.ToListAsync(ct).ConfigureAwait(false);
var registered = 0;
var skippedUnknownType = 0;
var failedInit = 0;
foreach (var row in rows)
{
var factory = factories.TryGet(row.DriverType);
if (factory is null)
{
logger.LogWarning(
"DriverInstance {Id} skipped — DriverType '{Type}' has no registered factory (known: {Known})",
row.DriverInstanceId, row.DriverType, string.Join(",", factories.RegisteredTypes));
skippedUnknownType++;
continue;
}
try
{
var driver = factory(row.DriverInstanceId, row.DriverConfig);
await driverHost.RegisterAsync(driver, row.DriverConfig, ct).ConfigureAwait(false);
registered++;
logger.LogInformation(
"DriverInstance {Id} ({Type}) registered + initialized", row.DriverInstanceId, row.DriverType);
// Scheduled-recycle opt-in — only meaningful for Tier C out-of-process hosts,
// and only when the row's ResilienceConfig carries a positive
// RecycleIntervalSeconds AND the deployment wired an IDriverSupervisor for
// this DriverInstanceId. Silently skipping when any of those is absent is the
// intended zero-config-default behaviour.
TryRegisterScheduledRecycle(row.DriverInstanceId, row.DriverType, row.ResilienceConfig);
}
catch (Exception ex)
{
// Plan decision #12 — driver isolation. Log + continue so one bad row
// doesn't deny the OPC UA endpoint to the rest of the fleet.
logger.LogError(ex,
"DriverInstance {Id} ({Type}) failed to initialize — driver state will reflect Faulted; operator can republish to retry",
row.DriverInstanceId, row.DriverType);
failedInit++;
}
}
logger.LogInformation(
"DriverInstanceBootstrapper: gen={Gen} registered={Registered} skippedUnknownType={Skipped} failedInit={Failed}",
generationId, registered, skippedUnknownType, failedInit);
return registered;
}
private void TryRegisterScheduledRecycle(string driverInstanceId, string driverType, string? resilienceJson)
{
var tier = factories.GetTier(driverType);
if (tier != DriverTier.C) return;
var options = DriverResilienceOptionsParser.ParseOrDefaults(tier, resilienceJson, out _);
if (options.RecycleIntervalSeconds is not int secs) return;
if (!_supervisors.TryGetValue(driverInstanceId, out var supervisor))
{
logger.LogWarning(
"DriverInstance {Id} ({Type}) has RecycleIntervalSeconds={Secs} in ResilienceConfig but no IDriverSupervisor registered; scheduled recycle will not fire",
driverInstanceId, driverType, secs);
return;
}
var scheduler = new ScheduledRecycleScheduler(
tier,
TimeSpan.FromSeconds(secs),
DateTime.UtcNow,
supervisor,
loggerFactory.CreateLogger<ScheduledRecycleScheduler>());
recycleHost.AddScheduler(scheduler);
logger.LogInformation(
"Scheduled recycle armed for Tier C driver {Id} ({Type}) — interval {Interval}, first fire at {Next:o}",
driverInstanceId, driverType, TimeSpan.FromSeconds(secs), scheduler.NextRecycleUtc);
}
}

View File

@@ -0,0 +1,71 @@
using System.Collections.Concurrent;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.History;
/// <summary>
/// Default <see cref="IHistoryRouter"/> implementation.
/// </summary>
public sealed class HistoryRouter : IHistoryRouter
{
private readonly ConcurrentDictionary<string, IHistorianDataSource> _registry =
new(StringComparer.OrdinalIgnoreCase);
private bool _disposed;
/// <inheritdoc />
public void Register(string fullReferencePrefix, IHistorianDataSource source)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(fullReferencePrefix);
ArgumentNullException.ThrowIfNull(source);
if (!_registry.TryAdd(fullReferencePrefix, source))
{
throw new InvalidOperationException(
$"A historian data source is already registered for prefix '{fullReferencePrefix}'.");
}
}
/// <inheritdoc />
public IHistorianDataSource? Resolve(string fullReference)
{
ObjectDisposedException.ThrowIf(_disposed, this);
ArgumentNullException.ThrowIfNull(fullReference);
// Longest-prefix match. Sources are typically a handful per server, so a linear
// scan is fine and avoids building a trie for a low-cardinality registry.
IHistorianDataSource? best = null;
var bestPrefixLength = -1;
foreach (var (prefix, source) in _registry)
{
if (fullReference.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)
&& prefix.Length > bestPrefixLength)
{
best = source;
bestPrefixLength = prefix.Length;
}
}
return best;
}
/// <summary>
/// Disposes every registered source and prevents further registrations or
/// resolutions. Sources may not all be disposable — null-safe disposal pattern.
/// </summary>
public void Dispose()
{
if (_disposed) return;
_disposed = true;
foreach (var source in _registry.Values)
{
try { source.Dispose(); }
catch { /* best-effort — server shutdown should not throw on a misbehaving source */ }
}
_registry.Clear();
}
}

View File

@@ -0,0 +1,37 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.History;
/// <summary>
/// Server-level routing of OPC UA HistoryRead service calls to a registered
/// <see cref="IHistorianDataSource"/>. One router per server instance; sources are
/// registered at startup keyed by a driver-side full-reference prefix (typically the
/// driver instance id).
/// </summary>
/// <remarks>
/// <para>
/// The router decouples history availability from the driver lifecycle: a driver
/// can restart (or be temporarily disconnected) without taking history offline,
/// and a single historian can serve nodes from multiple drivers.
/// </para>
/// <para>
/// Resolution is by longest-prefix match so a per-driver source registered under
/// <c>"galaxy"</c> wins over a fallback registered under empty string.
/// </para>
/// </remarks>
public interface IHistoryRouter : IDisposable
{
/// <summary>
/// Resolves a full reference to its registered data source, or null when no source
/// covers it.
/// </summary>
IHistorianDataSource? Resolve(string fullReference);
/// <summary>
/// Registers a data source for full references that start with
/// <paramref name="fullReferencePrefix"/>. Throws when the prefix is already
/// registered — duplicate registrations indicate a startup-config bug rather than
/// a runtime concern.
/// </summary>
void Register(string fullReferencePrefix, IHistorianDataSource source);
}

View File

@@ -0,0 +1,59 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
namespace ZB.MOM.WW.OtOpcUa.Server.History;
/// <summary>
/// Hosted service that registers the configured <see cref="WonderwareHistorianClient"/>
/// as a source on the server-level <see cref="IHistoryRouter"/> at startup. Per-namespace
/// prefix is the driver instance id the operator binds the historian to (typically
/// "galaxy"); future per-area or per-equipment overrides can register under longer prefixes.
/// </summary>
/// <remarks>
/// PR 3.W only wires this when <c>Historian:Wonderware:Enabled=true</c> in config. The
/// hosted service does its work in <see cref="StartAsync"/> and stays passive afterward;
/// <see cref="StopAsync"/> is a no-op since router disposal happens through the singleton's
/// own DI lifecycle.
/// </remarks>
public sealed class WonderwareHistorianBootstrap : IHostedService
{
private readonly IHistoryRouter _router;
private readonly WonderwareHistorianClient _client;
private readonly string _prefix;
private readonly ILogger<WonderwareHistorianBootstrap> _logger;
public WonderwareHistorianBootstrap(
IHistoryRouter router,
WonderwareHistorianClient client,
string fullReferencePrefix,
ILogger<WonderwareHistorianBootstrap> logger)
{
_router = router ?? throw new ArgumentNullException(nameof(router));
_client = client ?? throw new ArgumentNullException(nameof(client));
_prefix = fullReferencePrefix ?? throw new ArgumentNullException(nameof(fullReferencePrefix));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Task StartAsync(CancellationToken cancellationToken)
{
try
{
_router.Register(_prefix, (IHistorianDataSource)_client);
_logger.LogInformation(
"Wonderware historian sidecar registered as IHistoryRouter source under prefix '{Prefix}'",
_prefix);
}
catch (InvalidOperationException ex)
{
// Prefix already registered (e.g. server restart without DI rebuild). Tolerate
// — the existing registration is the same singleton instance and stays valid.
_logger.LogWarning(ex,
"Wonderware historian source already registered for prefix '{Prefix}' — leaving existing entry", _prefix);
}
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -0,0 +1,143 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// Walks every registered driver once per heartbeat interval, asks each
/// <see cref="IHostConnectivityProbe"/>-capable driver for its current
/// <see cref="HostConnectivityStatus"/> list, and upserts one
/// <see cref="DriverHostStatus"/> row per (NodeId, DriverInstanceId, HostName) into the
/// central config DB. Powers the Admin UI's per-host drill-down page (LMX follow-up #7).
/// </summary>
/// <remarks>
/// <para>
/// Polling rather than event-driven: simpler, and matches the cadence the Admin UI
/// consumes. An event-subscription optimization (push on <c>OnHostStatusChanged</c> for
/// immediate reflection) is a straightforward follow-up but adds lifecycle complexity
/// — drivers can be registered after the publisher starts, and subscribing to each
/// one's event on register + unsubscribing on unregister requires DriverHost to expose
/// lifecycle events it doesn't today.
/// </para>
/// <para>
/// <see cref="DriverHostStatus.LastSeenUtc"/> advances every heartbeat so the Admin UI
/// can flag stale rows from a crashed Server process independent of
/// <see cref="DriverHostStatus.State"/> — a Faulted publisher that stops heartbeating
/// stays Faulted in the DB but its LastSeenUtc ages out, which is the signal
/// operators actually want.
/// </para>
/// <para>
/// If the DB is unreachable on a given tick, the publisher logs and moves on — it
/// does not retry or buffer. The next heartbeat picks up the current-state snapshot,
/// which is more useful than replaying stale transitions after a long outage.
/// </para>
/// </remarks>
public sealed class HostStatusPublisher(
DriverHost driverHost,
NodeOptions nodeOptions,
IServiceScopeFactory scopeFactory,
ILogger<HostStatusPublisher> logger) : BackgroundService
{
internal static readonly TimeSpan HeartbeatInterval = TimeSpan.FromSeconds(10);
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// Wait a short moment at startup so NodeBootstrap's RegisterAsync calls have had a
// chance to land. First tick runs immediately after so a freshly-started Server
// surfaces its host topology in the Admin UI without waiting a full interval.
try { await Task.Delay(TimeSpan.FromSeconds(2), stoppingToken); }
catch (OperationCanceledException) { return; }
while (!stoppingToken.IsCancellationRequested)
{
try { await PublishOnceAsync(stoppingToken); }
catch (OperationCanceledException) { return; }
catch (Exception ex)
{
// Never take down the Server on a publisher failure. Log and continue —
// stale-row detection on the Admin side will surface the outage.
logger.LogWarning(ex, "Host-status publisher tick failed — will retry next heartbeat");
}
try { await Task.Delay(HeartbeatInterval, stoppingToken); }
catch (OperationCanceledException) { return; }
}
}
internal async Task PublishOnceAsync(CancellationToken ct)
{
var driverIds = driverHost.RegisteredDriverIds;
if (driverIds.Count == 0) return;
var now = DateTime.UtcNow;
using var scope = scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
foreach (var driverId in driverIds)
{
var driver = driverHost.GetDriver(driverId);
if (driver is not IHostConnectivityProbe probe) continue;
IReadOnlyList<HostConnectivityStatus> statuses;
try { statuses = probe.GetHostStatuses(); }
catch (Exception ex)
{
logger.LogWarning(ex, "Driver {DriverId} GetHostStatuses threw — skipping this tick", driverId);
continue;
}
foreach (var status in statuses)
{
await UpsertAsync(db, driverId, status, now, ct);
}
}
await db.SaveChangesAsync(ct);
}
private async Task UpsertAsync(OtOpcUaConfigDbContext db, string driverId,
HostConnectivityStatus status, DateTime now, CancellationToken ct)
{
var mapped = MapState(status.State);
var existing = await db.DriverHostStatuses.SingleOrDefaultAsync(r =>
r.NodeId == nodeOptions.NodeId
&& r.DriverInstanceId == driverId
&& r.HostName == status.HostName, ct);
if (existing is null)
{
db.DriverHostStatuses.Add(new DriverHostStatus
{
NodeId = nodeOptions.NodeId,
DriverInstanceId = driverId,
HostName = status.HostName,
State = mapped,
StateChangedUtc = status.LastChangedUtc,
LastSeenUtc = now,
});
return;
}
existing.LastSeenUtc = now;
if (existing.State != mapped)
{
existing.State = mapped;
existing.StateChangedUtc = status.LastChangedUtc;
}
}
internal static DriverHostState MapState(HostState state) => state switch
{
HostState.Running => DriverHostState.Running,
HostState.Stopped => DriverHostState.Stopped,
HostState.Faulted => DriverHostState.Faulted,
_ => DriverHostState.Unknown,
};
}

View File

@@ -0,0 +1,160 @@
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Phase 6.3 A.2 + Phase 6.1 Stream D follow-up — polls
/// <c>sp_GetCurrentGenerationForCluster</c> on a cadence and, when a newer generation
/// is detected, wraps the apply in an <see cref="ApplyLeaseRegistry"/> lease
/// (flipping ServiceLevel to <see cref="ServiceLevelBand.PrimaryMidApply"/>) and
/// refreshes the <see cref="RedundancyCoordinator"/> so operator role-swaps take
/// effect without a process restart.
/// </summary>
/// <remarks>
/// <para>
/// Before this service shipped, the Server only ever saw the generation in force
/// at process start (<see cref="SealedBootstrap"/>). Peer-published generations
/// silently accumulated in the shared config DB; the running node kept serving
/// the generation it had sealed on boot until the operator restarted it.
/// </para>
/// <para>
/// Closes the Phase 6.3 D.1 design hole around <c>PrimaryMidApply</c>: the
/// <c>coordinator.BeginApplyLease(...)</c> wrap now encloses an actual apply
/// (the coordinator refresh + future subscriber fan-out). Lease dispose fires
/// on every exit path — success, exception, cancellation — so
/// <c>ApplyLeaseRegistry</c> can never pin a crashed refresh at
/// PrimaryMidApply.
/// </para>
/// <para>
/// Deliberately narrow scope: refreshes <see cref="RedundancyCoordinator"/>
/// only. Driver re-init, virtual-tag re-bind, script-engine reload, etc. remain
/// as follow-up wiring — add subscribers to this service's apply path as those
/// components grow hot-reload support. The lease wrap is the right seam for
/// those subscribers to hook.
/// </para>
/// </remarks>
public sealed class GenerationRefreshHostedService(
NodeOptions options,
ApplyLeaseRegistry leases,
RedundancyCoordinator coordinator,
ILogger<GenerationRefreshHostedService> logger,
TimeSpan? tickInterval = null,
Func<CancellationToken, Task<long?>>? currentGenerationQuery = null) : BackgroundService
{
private readonly Func<CancellationToken, Task<long?>> _generationQuery = currentGenerationQuery
?? new Func<CancellationToken, Task<long?>>(ct => DefaultQueryCurrentGenerationAsync(options, logger, ct));
/// <summary>
/// How often the service polls <c>sp_GetCurrentGenerationForCluster</c>. Default 5 s —
/// low enough that operator publishes take effect promptly, high enough that the
/// overhead on the central DB is negligible even across a 100-node fleet.
/// </summary>
public TimeSpan TickInterval { get; } = tickInterval ?? TimeSpan.FromSeconds(5);
/// <summary>
/// Newest generation the service has applied. Exposed for diagnostics +
/// <see cref="TickCount"/> style health surfaces. <c>null</c> before the first
/// successful poll.
/// </summary>
public long? LastAppliedGenerationId { get; private set; }
/// <summary>Successful ticks — whether or not a generation change was detected.</summary>
public int TickCount { get; private set; }
/// <summary>Ticks that observed a generation change and ran a refresh.</summary>
public int RefreshCount { get; private set; }
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation(
"GenerationRefreshHostedService running — polling every {Tick}s",
TickInterval.TotalSeconds);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await TickAsync(stoppingToken).ConfigureAwait(false);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "GenerationRefreshHostedService tick failed");
}
try { await Task.Delay(TickInterval, stoppingToken).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
}
}
// internal for tests — single-tick entry point.
internal async Task TickAsync(CancellationToken cancellationToken)
{
var current = await _generationQuery(cancellationToken).ConfigureAwait(false);
TickCount++;
if (current is null) return;
if (LastAppliedGenerationId is long last && current == last)
{
return; // no change
}
logger.LogInformation(
"Generation change detected — {Previous} → {Current}; applying",
LastAppliedGenerationId?.ToString() ?? "(none)", current);
// Lease wraps the apply window: ServiceLevelCalculator reads
// ApplyLeaseRegistry.IsApplyInProgress and returns PrimaryMidApply (200) while any
// lease is open. Publisher ticks in parallel (1s cadence) will observe the band
// transition and push it onto the OPC UA Server.ServiceLevel node.
var publishRequestId = Guid.NewGuid();
await using (leases.BeginApplyLease(current.Value, publishRequestId))
{
await coordinator.RefreshAsync(cancellationToken).ConfigureAwait(false);
// Future: fire a domain event that driver hosts / virtual-tag engine /
// scripted-alarm engine subscribe to. For now the topology refresh is the
// only thing we rewire — everything else still requires a process restart.
}
LastAppliedGenerationId = current;
RefreshCount++;
}
/// <summary>
/// Default generation-query implementation — reads via
/// <c>sp_GetCurrentGenerationForCluster</c>. Returns <c>null</c> when no generation
/// has been published yet, or when the DB call fails (logged at Warning; next tick
/// retries). Tests inject a stub <see cref="Func{CancellationToken, Task}"/> via the
/// <c>currentGenerationQuery</c> constructor parameter instead.
/// </summary>
private static async Task<long?> DefaultQueryCurrentGenerationAsync(
NodeOptions options,
ILogger logger,
CancellationToken cancellationToken)
{
try
{
await using var conn = new SqlConnection(options.ConfigDbConnectionString);
await conn.OpenAsync(cancellationToken).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
cmd.Parameters.AddWithValue("@n", options.NodeId);
cmd.Parameters.AddWithValue("@c", options.ClusterId);
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
return null;
}
return reader.GetInt64(0);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "sp_GetCurrentGenerationForCluster failed — will retry");
return null;
}
}
}

View File

@@ -0,0 +1,103 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Phase 6.3 Stream B.1 — HTTP peer-probe loop. Polls every configured peer's
/// <c>/healthz</c> endpoint on a fast cadence (default 2 s) with a short timeout
/// (default 1 s) and writes the result to <see cref="PeerReachabilityTracker"/>.
/// </summary>
/// <remarks>
/// <para>
/// Fast-fail layer — the UA probe short-circuits when HTTP says dead, so a failing
/// peer is detected within ~2 s without paying the cost of a full OPC UA session
/// setup on every tick.
/// </para>
/// <para>
/// Writes preserve the last UA-health bit so a transient HTTP blip doesn't stomp the
/// authoritative UA reading until the next UA tick. <see cref="PeerReachability"/>
/// is a record; we compose a new one per update.
/// </para>
/// <para>
/// Probe target is derived as <c>http://{peer.Host}:{peer.DashboardPort}/healthz</c>.
/// The server's own health-endpoints host serves <c>/healthz</c> on the dashboard
/// port, so this is symmetric with what peers expect to be probed.
/// </para>
/// </remarks>
public sealed class PeerHttpProbeLoop(
RedundancyCoordinator coordinator,
PeerReachabilityTracker tracker,
IHttpClientFactory httpClientFactory,
ILogger<PeerHttpProbeLoop> logger,
PeerProbeOptions? options = null) : BackgroundService
{
private readonly PeerProbeOptions _options = options ?? new PeerProbeOptions();
internal const string HttpClientName = "PeerHttpProbe";
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation(
"PeerHttpProbeLoop running — probe every {Interval}ms, timeout {Timeout}ms",
_options.HttpProbeInterval.TotalMilliseconds, _options.HttpProbeTimeout.TotalMilliseconds);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await TickAsync(stoppingToken).ConfigureAwait(false);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "PeerHttpProbeLoop tick failed");
}
try { await Task.Delay(_options.HttpProbeInterval, stoppingToken).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
}
}
// internal for tests — lets a unit test drive a single tick synchronously without the loop.
internal async Task TickAsync(CancellationToken cancellationToken)
{
var topology = coordinator.Current;
if (topology is null || topology.Peers.Count == 0) return;
// Probe every peer in parallel — one slow peer shouldn't block the cadence for others.
var probes = topology.Peers.Select(p => ProbeAsync(p, cancellationToken)).ToArray();
await Task.WhenAll(probes).ConfigureAwait(false);
}
private async Task ProbeAsync(RedundancyPeer peer, CancellationToken cancellationToken)
{
var url = $"http://{peer.Host}:{peer.DashboardPort}/healthz";
var healthy = false;
try
{
using var client = httpClientFactory.CreateClient(HttpClientName);
client.Timeout = _options.HttpProbeTimeout;
using var response = await client.GetAsync(url, cancellationToken).ConfigureAwait(false);
healthy = response.IsSuccessStatusCode;
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
return; // shutdown — drop the result rather than writing a false-unhealthy
}
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or OperationCanceledException)
{
// Any transport-level failure counts as unhealthy — connection refused, timeout,
// DNS fail, TLS fail. Swallow + mark unhealthy; don't log every tick, only when
// state transitions.
healthy = false;
}
var previous = tracker.Get(peer.NodeId);
if (previous.HttpHealthy != healthy)
{
logger.LogInformation("Peer {NodeId} HTTP probe {Transition} ({Url})",
peer.NodeId, healthy ? "Healthy" : "Unhealthy", url);
}
tracker.Update(peer.NodeId, previous with { HttpHealthy = healthy });
}
}

View File

@@ -0,0 +1,27 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Configuration for the Phase 6.3 Stream B peer-probe HostedServices
/// (<see cref="PeerHttpProbeLoop"/> + <see cref="PeerUaProbeLoop"/>). Drives cadence +
/// timeout for the two-layer probe model. Defaults match the spec in
/// <c>docs/v2/implementation/phase-6-3-redundancy-runtime.md</c> §Stream B.
/// </summary>
/// <remarks>
/// HTTP layer is the fast-fail at 2 s / 1 s timeout; UA layer is authoritative at 10 s /
/// 5 s timeout. The UA probe short-circuits when the HTTP probe last reported the peer
/// unhealthy, to avoid burning TCP sessions on a known-dead endpoint.
/// </remarks>
public sealed class PeerProbeOptions
{
/// <summary>How often <see cref="PeerHttpProbeLoop"/> ticks. Default 2 s.</summary>
public TimeSpan HttpProbeInterval { get; init; } = TimeSpan.FromSeconds(2);
/// <summary>Per-request timeout for the HTTP <c>/healthz</c> probe. Default 1 s.</summary>
public TimeSpan HttpProbeTimeout { get; init; } = TimeSpan.FromSeconds(1);
/// <summary>How often <see cref="PeerUaProbeLoop"/> ticks. Default 10 s.</summary>
public TimeSpan UaProbeInterval { get; init; } = TimeSpan.FromSeconds(10);
/// <summary>Per-request timeout for the OPC UA endpoint discovery probe. Default 5 s.</summary>
public TimeSpan UaProbeTimeout { get; init; } = TimeSpan.FromSeconds(5);
}

View File

@@ -0,0 +1,133 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Opc.Ua;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Phase 6.3 Stream B.2 — OPC UA peer-probe loop. Opens a minimal discovery session to
/// each peer's OPC UA endpoint on a slow cadence (default 10 s) and records
/// <see cref="PeerReachability.UaHealthy"/> in the tracker.
/// </summary>
/// <remarks>
/// <para>
/// Authoritative layer — the OPC UA discovery call verifies the endpoint actually
/// serves UA traffic (not just that the host OS answers a TCP connect on 4840).
/// If the peer passes UA discovery, it can serve real client sessions.
/// </para>
/// <para>
/// Short-circuits when the HTTP probe (<see cref="PeerHttpProbeLoop"/>) last marked
/// the peer unhealthy — no point burning a full TCP+OPC UA handshake on a peer the
/// fast-fail probe already says is dead. In that case <see cref="PeerReachability.UaHealthy"/>
/// is cleared (stale-UA-state protection) so a sustained HTTP outage doesn't leave
/// an ancient UaHealthy=true reading feeding the ServiceLevel calculator.
/// </para>
/// <para>
/// Implementation uses <c>DiscoveryClient.GetEndpoints</c> rather than opening a
/// full authenticated <c>Session</c> — the discovery endpoint is server-side cheap
/// (no session state), needs no certificate trust, and is specifically designed for
/// availability pinging. Timeout bounded by <see cref="PeerProbeOptions.UaProbeTimeout"/>.
/// </para>
/// </remarks>
public sealed class PeerUaProbeLoop(
RedundancyCoordinator coordinator,
PeerReachabilityTracker tracker,
ILogger<PeerUaProbeLoop> logger,
PeerProbeOptions? options = null,
Func<string, TimeSpan, CancellationToken, Task<bool>>? endpointProbe = null) : BackgroundService
{
private readonly PeerProbeOptions _options = options ?? new PeerProbeOptions();
private readonly Func<string, TimeSpan, CancellationToken, Task<bool>> _endpointProbe
= endpointProbe ?? DefaultEndpointProbeAsync;
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation(
"PeerUaProbeLoop running — probe every {Interval}ms, timeout {Timeout}ms",
_options.UaProbeInterval.TotalMilliseconds, _options.UaProbeTimeout.TotalMilliseconds);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await TickAsync(stoppingToken).ConfigureAwait(false);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "PeerUaProbeLoop tick failed");
}
try { await Task.Delay(_options.UaProbeInterval, stoppingToken).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
}
}
// internal for tests — single-tick entry point.
internal async Task TickAsync(CancellationToken cancellationToken)
{
var topology = coordinator.Current;
if (topology is null || topology.Peers.Count == 0) return;
var probes = topology.Peers.Select(p => ProbeAsync(p, cancellationToken)).ToArray();
await Task.WhenAll(probes).ConfigureAwait(false);
}
private async Task ProbeAsync(RedundancyPeer peer, CancellationToken cancellationToken)
{
var previous = tracker.Get(peer.NodeId);
// Short-circuit: don't waste a UA handshake when HTTP says the peer is down. Clear
// the UA bit so the publisher doesn't see a stale "UA still healthy" reading.
if (!previous.HttpHealthy)
{
if (previous.UaHealthy)
{
tracker.Update(peer.NodeId, previous with { UaHealthy = false });
logger.LogInformation("Peer {NodeId} UA probe cleared (HTTP unhealthy)", peer.NodeId);
}
return;
}
var endpoint = $"opc.tcp://{peer.Host}:{peer.OpcUaPort}";
var healthy = await _endpointProbe(endpoint, _options.UaProbeTimeout, cancellationToken).ConfigureAwait(false);
if (previous.UaHealthy != healthy)
{
logger.LogInformation("Peer {NodeId} UA probe {Transition} ({Endpoint})",
peer.NodeId, healthy ? "Healthy" : "Unhealthy", endpoint);
}
tracker.Update(peer.NodeId, previous with { UaHealthy = healthy });
}
/// <summary>
/// Default probe — <c>DiscoveryClient.GetEndpoints</c> against the peer's OPC UA
/// endpoint. Lightweight (no session, no certificate trust). Returns <c>true</c>
/// iff the call returns at least one advertised endpoint within the supplied
/// timeout; any transport, protocol, or timeout failure counts as unhealthy.
/// </summary>
internal static async Task<bool> DefaultEndpointProbeAsync(
string endpointUrl, TimeSpan timeout, CancellationToken cancellationToken)
{
try
{
using var linked = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
linked.CancelAfter(timeout);
var config = EndpointConfiguration.Create();
config.OperationTimeout = (int)timeout.TotalMilliseconds;
using var discoveryClient = DiscoveryClient.Create(new Uri(endpointUrl), config);
var endpoints = await Task.Run(() => discoveryClient.GetEndpoints(null), linked.Token).ConfigureAwait(false);
return endpoints is { Count: > 0 };
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
return false;
}
catch
{
return false;
}
}
}

View File

@@ -0,0 +1,130 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Phase 6.3 Stream C (task #147) glue — drives <see cref="RedundancyStatePublisher"/> on
/// a periodic tick and pushes the resulting ServiceLevel / ServerUriArray /
/// RedundancySupport values onto the OPC UA Server node via
/// <see cref="ServerRedundancyNodeWriter"/>.
/// </summary>
/// <remarks>
/// <para>
/// The OPC UA <c>ServerObject</c> exists only after <c>StandardServer.OnServerStarted</c>
/// has run, which is inside <see cref="OpcUaApplicationHost.StartAsync"/>. This hosted
/// service polls for <c>host.Server?.CurrentInstance</c> to become non-null before
/// binding the writer — the server boot sequence doesn't expose a "ready" event.
/// </para>
/// <para>
/// Tick cadence is 1 s by default. The publisher is edge-triggered internally so a
/// no-change tick is cheap; the writer is also idempotent so we can safely apply the
/// same values every tick without generating spurious OPC UA notifications.
/// </para>
/// </remarks>
public sealed class RedundancyPublisherHostedService(
OpcUaApplicationHost host,
RedundancyStatePublisher publisher,
RedundancyCoordinator coordinator,
ILogger<RedundancyPublisherHostedService> logger,
ILoggerFactory loggerFactory) : BackgroundService
{
public TimeSpan TickInterval { get; init; } = TimeSpan.FromSeconds(1);
public TimeSpan ServerReadyPollInterval { get; init; } = TimeSpan.FromMilliseconds(250);
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
// 0. Load topology from the shared config DB. RefreshAsync (not InitializeAsync)
// so an invariant violation degrades to ServiceLevelBand.InvalidTopology rather
// than crashing the hosted service — operator visibility beats fail-fast here.
await coordinator.RefreshAsync(stoppingToken).ConfigureAwait(false);
// 1. Wait for OPC UA server's ServerObject to materialize.
var writer = await WaitForServerReadyAsync(stoppingToken).ConfigureAwait(false);
if (writer is null) return; // cancelled before startup completed
// 2. Subscribe writer to publisher events — edge-triggered ServiceLevel +
// ServerUriArray updates from the publisher fan out onto the Server node.
publisher.OnStateChanged += OnServiceLevelChanged;
publisher.OnServerUriArrayChanged += OnServerUriArrayChanged;
// 3. One-time RedundancySupport from the coordinator's current topology. If the
// topology isn't loaded yet, we'll retry on the first compute-publish tick.
ApplyRedundancySupportIfKnown(writer);
logger.LogInformation(
"RedundancyPublisherHostedService running — tick every {Tick}ms",
TickInterval.TotalMilliseconds);
try
{
while (!stoppingToken.IsCancellationRequested)
{
try
{
publisher.ComputeAndPublish();
ApplyRedundancySupportIfKnown(writer); // cheap + idempotent
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "RedundancyStatePublisher tick failed");
}
try { await Task.Delay(TickInterval, stoppingToken).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
}
}
finally
{
publisher.OnStateChanged -= OnServiceLevelChanged;
publisher.OnServerUriArrayChanged -= OnServerUriArrayChanged;
}
void OnServiceLevelChanged(ServiceLevelSnapshot snap) => writer.ApplyServiceLevel(snap.Value);
void OnServerUriArrayChanged(IReadOnlyList<string> uris) => writer.ApplyServerUriArray(uris);
}
private async Task<ServerRedundancyNodeWriter?> WaitForServerReadyAsync(CancellationToken ct)
{
// Bounded retry so a genuine failure to start doesn't pin the hosted service forever.
// 60s is generous — production boot is ~2s on this box; cert PKI + certificate-creation
// cases have been observed to take up to 15s cold.
//
// StandardServer.CurrentInstance throws BadServerHalted before OnServerStarted has run,
// rather than returning null, so we catch that specifically and retry. Other
// ServiceResultException codes (e.g. BadInternalError) are still propagated — a true
// boot failure shouldn't look like "not ready yet".
var deadline = DateTime.UtcNow.AddSeconds(60);
while (!ct.IsCancellationRequested && DateTime.UtcNow < deadline)
{
Opc.Ua.Server.IServerInternal? serverInternal = null;
try { serverInternal = host.Server?.CurrentInstance; }
catch (Opc.Ua.ServiceResultException ex) when (ex.StatusCode == Opc.Ua.StatusCodes.BadServerHalted)
{
// Server is mid-startup — keep polling.
}
if (serverInternal?.ServerObject is not null)
{
var writerLogger = loggerFactory.CreateLogger<ServerRedundancyNodeWriter>();
return new ServerRedundancyNodeWriter(serverInternal, writerLogger);
}
try { await Task.Delay(ServerReadyPollInterval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return null; }
}
if (!ct.IsCancellationRequested)
logger.LogError("OPC UA ServerObject did not materialize within 60s — Phase 6.3 Stream C wiring is inactive");
return null;
}
private void ApplyRedundancySupportIfKnown(ServerRedundancyNodeWriter writer)
{
var topology = coordinator.Current;
if (topology is null) return;
writer.ApplyRedundancySupport(topology.Mode);
}
}

View File

@@ -0,0 +1,139 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Samples <see cref="DriverResilienceStatusTracker"/> at a fixed tick + upserts each
/// <c>(DriverInstanceId, HostName)</c> snapshot into <see cref="DriverInstanceResilienceStatus"/>
/// so Admin <c>/hosts</c> can render live resilience counters across restarts.
/// </summary>
/// <remarks>
/// <para>Closes the HostedService piece of Phase 6.1 Stream E.2 flagged as a follow-up
/// when the tracker shipped in PR #82. The Admin UI column-refresh piece (red badge when
/// ConsecutiveFailures &gt; breakerThreshold / 2 + SignalR push) is still deferred to
/// the visual-compliance pass — this service owns the persistence half alone.</para>
///
/// <para>Tick interval defaults to 5 s. Persistence is best-effort: a DB outage during
/// a tick logs + continues; the next tick tries again with the latest snapshots. The
/// hosted service never crashes the app on sample failure.</para>
///
/// <para><see cref="PersistOnceAsync"/> factored as a public method so tests can drive
/// it directly, matching the <see cref="ScheduledRecycleHostedService.TickOnceAsync"/>
/// pattern for deterministic unit-test timing.</para>
/// </remarks>
public sealed class ResilienceStatusPublisherHostedService : BackgroundService
{
private readonly DriverResilienceStatusTracker _tracker;
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbContextFactory;
private readonly ILogger<ResilienceStatusPublisherHostedService> _logger;
private readonly TimeProvider _timeProvider;
/// <summary>Tick interval — how often the tracker snapshot is persisted.</summary>
public TimeSpan TickInterval { get; }
/// <summary>Snapshot of the tick count for diagnostics + test assertions.</summary>
public int TickCount { get; private set; }
public ResilienceStatusPublisherHostedService(
DriverResilienceStatusTracker tracker,
IDbContextFactory<OtOpcUaConfigDbContext> dbContextFactory,
ILogger<ResilienceStatusPublisherHostedService> logger,
TimeProvider? timeProvider = null,
TimeSpan? tickInterval = null)
{
ArgumentNullException.ThrowIfNull(tracker);
ArgumentNullException.ThrowIfNull(dbContextFactory);
_tracker = tracker;
_dbContextFactory = dbContextFactory;
_logger = logger;
_timeProvider = timeProvider ?? TimeProvider.System;
TickInterval = tickInterval ?? TimeSpan.FromSeconds(5);
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"ResilienceStatusPublisherHostedService starting — tick interval = {Interval}",
TickInterval);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await Task.Delay(TickInterval, _timeProvider, stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
}
await PersistOnceAsync(stoppingToken).ConfigureAwait(false);
}
_logger.LogInformation("ResilienceStatusPublisherHostedService stopping after {TickCount} tick(s).", TickCount);
}
/// <summary>
/// Take one snapshot of the tracker + upsert each pair into the persistence table.
/// Swallows transient exceptions + logs them; never throws from a sample failure.
/// </summary>
public async Task PersistOnceAsync(CancellationToken cancellationToken)
{
TickCount++;
var snapshot = _tracker.Snapshot();
if (snapshot.Count == 0) return;
try
{
await using var db = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
var now = _timeProvider.GetUtcNow().UtcDateTime;
foreach (var (driverInstanceId, hostName, counters) in snapshot)
{
var existing = await db.DriverInstanceResilienceStatuses
.FirstOrDefaultAsync(x => x.DriverInstanceId == driverInstanceId && x.HostName == hostName, cancellationToken)
.ConfigureAwait(false);
if (existing is null)
{
db.DriverInstanceResilienceStatuses.Add(new DriverInstanceResilienceStatus
{
DriverInstanceId = driverInstanceId,
HostName = hostName,
LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc,
ConsecutiveFailures = counters.ConsecutiveFailures,
CurrentBulkheadDepth = counters.CurrentInFlight,
LastRecycleUtc = counters.LastRecycleUtc,
BaselineFootprintBytes = counters.BaselineFootprintBytes,
CurrentFootprintBytes = counters.CurrentFootprintBytes,
LastSampledUtc = now,
});
}
else
{
existing.LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc;
existing.ConsecutiveFailures = counters.ConsecutiveFailures;
existing.CurrentBulkheadDepth = counters.CurrentInFlight;
existing.LastRecycleUtc = counters.LastRecycleUtc;
existing.BaselineFootprintBytes = counters.BaselineFootprintBytes;
existing.CurrentFootprintBytes = counters.CurrentFootprintBytes;
existing.LastSampledUtc = now;
}
}
await db.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException) { throw; }
catch (Exception ex)
{
_logger.LogWarning(ex,
"ResilienceStatusPublisher persistence tick failed; next tick will retry with latest snapshots.");
}
}
}

View File

@@ -0,0 +1,117 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Core.Stability;
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
/// <summary>
/// Drives one or more <see cref="ScheduledRecycleScheduler"/> instances on a fixed tick
/// cadence. Closes Phase 6.1 Stream B.4 by turning the shipped-as-pure-logic scheduler
/// into a running background feature.
/// </summary>
/// <remarks>
/// <para>Registered as a singleton in Program.cs. Each Tier C driver instance that wants a
/// scheduled recycle registers its scheduler via
/// <see cref="AddScheduler(ScheduledRecycleScheduler)"/> at startup. The hosted service
/// wakes every <see cref="TickInterval"/> (default 1 min) and calls
/// <see cref="ScheduledRecycleScheduler.TickAsync"/> on each registered scheduler.</para>
///
/// <para>Scheduler registration is closed after <see cref="ExecuteAsync"/> starts — callers
/// must register before the host starts, typically during DI setup. Adding a scheduler
/// mid-flight throws to avoid confusing "some ticks saw my scheduler, some didn't" races.</para>
/// </remarks>
public sealed class ScheduledRecycleHostedService : BackgroundService
{
private readonly List<ScheduledRecycleScheduler> _schedulers = [];
private readonly ILogger<ScheduledRecycleHostedService> _logger;
private readonly TimeProvider _timeProvider;
private bool _started;
/// <summary>How often <see cref="ScheduledRecycleScheduler.TickAsync"/> fires on each registered scheduler.</summary>
public TimeSpan TickInterval { get; }
public ScheduledRecycleHostedService(
ILogger<ScheduledRecycleHostedService> logger,
TimeProvider? timeProvider = null,
TimeSpan? tickInterval = null)
{
_logger = logger;
_timeProvider = timeProvider ?? TimeProvider.System;
TickInterval = tickInterval ?? TimeSpan.FromMinutes(1);
}
/// <summary>Register a scheduler to drive. Must be called before the host starts.</summary>
public void AddScheduler(ScheduledRecycleScheduler scheduler)
{
ArgumentNullException.ThrowIfNull(scheduler);
if (_started)
throw new InvalidOperationException(
"Cannot register a ScheduledRecycleScheduler after the hosted service has started. " +
"Register all schedulers during DI configuration / startup.");
_schedulers.Add(scheduler);
}
/// <summary>Snapshot of the current tick count — diagnostics only.</summary>
public int TickCount { get; private set; }
/// <summary>Snapshot of the number of registered schedulers — diagnostics only.</summary>
public int SchedulerCount => _schedulers.Count;
public override Task StartAsync(CancellationToken cancellationToken)
{
_started = true;
return base.StartAsync(cancellationToken);
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"ScheduledRecycleHostedService starting — {Count} scheduler(s), tick interval = {Interval}",
_schedulers.Count, TickInterval);
while (!stoppingToken.IsCancellationRequested)
{
try
{
await Task.Delay(TickInterval, _timeProvider, stoppingToken).ConfigureAwait(false);
}
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
{
break;
}
await TickOnceAsync(stoppingToken).ConfigureAwait(false);
}
_logger.LogInformation("ScheduledRecycleHostedService stopping after {TickCount} tick(s).", TickCount);
}
/// <summary>
/// Execute one scheduler tick against every registered scheduler. Factored out of the
/// <see cref="ExecuteAsync"/> loop so tests can drive it directly without needing to
/// synchronize with <see cref="Task.Delay(TimeSpan, TimeProvider, CancellationToken)"/>.
/// </summary>
public async Task TickOnceAsync(CancellationToken cancellationToken)
{
var now = _timeProvider.GetUtcNow().UtcDateTime;
TickCount++;
foreach (var scheduler in _schedulers)
{
try
{
var fired = await scheduler.TickAsync(now, cancellationToken).ConfigureAwait(false);
if (fired)
_logger.LogInformation("Scheduled recycle fired at {Now:o}; next = {Next:o}",
now, scheduler.NextRecycleUtc);
}
catch (OperationCanceledException) { throw; }
catch (Exception ex)
{
// A single scheduler fault must not take down the rest — log + continue.
_logger.LogError(ex,
"ScheduledRecycleScheduler tick failed at {Now:o}; continuing to other schedulers.", now);
}
}
}
}

View File

@@ -0,0 +1,64 @@
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// Bootstraps a node: fetches the current generation from the central DB via
/// <c>sp_GetCurrentGenerationForCluster</c>. If the DB is unreachable and a LiteDB cache entry
/// exists, falls back to cached config per decision #79 (degraded-but-running).
/// </summary>
public sealed class NodeBootstrap(
NodeOptions options,
ILocalConfigCache localCache,
ILogger<NodeBootstrap> logger)
{
public async Task<BootstrapResult> LoadCurrentGenerationAsync(CancellationToken ct)
{
try
{
await using var conn = new SqlConnection(options.ConfigDbConnectionString);
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
cmd.Parameters.AddWithValue("@n", options.NodeId);
cmd.Parameters.AddWithValue("@c", options.ClusterId);
await using var reader = await cmd.ExecuteReaderAsync(ct);
if (!await reader.ReadAsync(ct))
{
logger.LogWarning("Cluster {Cluster} has no Published generation yet", options.ClusterId);
return BootstrapResult.EmptyFromDb();
}
var generationId = reader.GetInt64(0);
logger.LogInformation("Bootstrapped from central DB: generation {GenerationId}", generationId);
return BootstrapResult.FromDb(generationId);
}
catch (Exception ex) when (ex is SqlException or InvalidOperationException or TimeoutException)
{
logger.LogWarning(ex, "Central DB unreachable; trying LiteDB cache fallback (decision #79)");
var cached = await localCache.GetMostRecentAsync(options.ClusterId, ct);
if (cached is null)
throw new BootstrapException(
"Central DB unreachable and no local cache available — cannot bootstrap.", ex);
logger.LogWarning("Bootstrapping from cache: generation {GenerationId} cached at {At}",
cached.GenerationId, cached.CachedAt);
return BootstrapResult.FromCache(cached.GenerationId);
}
}
}
public sealed record BootstrapResult(long? GenerationId, BootstrapSource Source)
{
public static BootstrapResult FromDb(long g) => new(g, BootstrapSource.CentralDb);
public static BootstrapResult FromCache(long g) => new(g, BootstrapSource.LocalCache);
public static BootstrapResult EmptyFromDb() => new(null, BootstrapSource.CentralDb);
}
public enum BootstrapSource { CentralDb, LocalCache }
public sealed class BootstrapException(string message, Exception inner) : Exception(message, inner);

View File

@@ -0,0 +1,28 @@
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// Bootstrap configuration read from <c>appsettings.json</c> (decision #18) — the minimum a
/// node needs to reach the central config DB and identify itself. Everything else comes from
/// the DB after bootstrap succeeds.
/// </summary>
public sealed class NodeOptions
{
public const string SectionName = "Node";
/// <summary>Stable node ID matching <c>ClusterNode.NodeId</c> in the central config DB.</summary>
public required string NodeId { get; init; }
/// <summary>Cluster this node belongs to.</summary>
public required string ClusterId { get; init; }
/// <summary>SQL Server connection string for the central config DB.</summary>
public required string ConfigDbConnectionString { get; init; }
/// <summary>Path to the LiteDB local cache file.</summary>
public string LocalCachePath { get; init; } = "config_cache.db";
/// <summary>Phase 6.2 authorization pipeline config. Disabled by default.</summary>
public AuthorizationOptions Authorization { get; init; } = new();
}

View File

@@ -0,0 +1,247 @@
using System.Net;
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.Observability;
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
namespace ZB.MOM.WW.OtOpcUa.Server.Observability;
/// <summary>
/// Standalone <see cref="HttpListener"/> host for <c>/healthz</c> and <c>/readyz</c>
/// separate from the OPC UA binding. Per <c>docs/v2/implementation/phase-6-1-resilience-
/// and-observability.md</c> §Stream C.1.
/// </summary>
/// <remarks>
/// Binds to <c>http://localhost:4841</c> by default — loopback avoids the Windows URL-ACL
/// elevation requirement that binding to <c>http://+:4841</c> (wildcard) would impose.
/// When a deployment needs remote probing, a reverse proxy or explicit netsh urlacl grant
/// is the expected path; documented in <c>docs/v2/Server-Deployment.md</c> in a follow-up.
/// </remarks>
public sealed class HealthEndpointsHost : IAsyncDisposable
{
private readonly string _prefix;
private readonly DriverHost _driverHost;
private readonly Func<bool> _configDbHealthy;
private readonly Func<bool> _usingStaleConfig;
private readonly ILogger<HealthEndpointsHost> _logger;
private readonly HttpListener _listener = new();
private readonly DateTime _startedUtc = DateTime.UtcNow;
private CancellationTokenSource? _cts;
private Task? _acceptLoop;
private bool _disposed;
public HealthEndpointsHost(
DriverHost driverHost,
ILogger<HealthEndpointsHost> logger,
Func<bool>? configDbHealthy = null,
Func<bool>? usingStaleConfig = null,
string prefix = "http://localhost:4841/")
{
_driverHost = driverHost;
_logger = logger;
_configDbHealthy = configDbHealthy ?? (() => true);
_usingStaleConfig = usingStaleConfig ?? (() => false);
_prefix = prefix.EndsWith('/') ? prefix : prefix + "/";
_listener.Prefixes.Add(_prefix);
}
public void Start()
{
_listener.Start();
_cts = new CancellationTokenSource();
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
_logger.LogInformation("Health endpoints listening on {Prefix}", _prefix);
}
private async Task AcceptLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
HttpListenerContext ctx;
try
{
ctx = await _listener.GetContextAsync().ConfigureAwait(false);
}
catch (HttpListenerException) when (ct.IsCancellationRequested) { break; }
catch (ObjectDisposedException) { break; }
_ = Task.Run(() => HandleAsync(ctx), ct);
}
}
private async Task HandleAsync(HttpListenerContext ctx)
{
try
{
var path = ctx.Request.Url?.AbsolutePath ?? "/";
switch (path)
{
case "/healthz":
await WriteHealthzAsync(ctx).ConfigureAwait(false);
break;
case "/readyz":
await WriteReadyzAsync(ctx).ConfigureAwait(false);
break;
default:
// #154 — driver-diagnostics path family. URL shape:
// /diagnostics/drivers/{driverInstanceId}/modbus/auto-prohibited
// Driver-agnostic at the URL level so future driver types (S7, AbCip,
// FOCAS) can add their own per-type subpaths.
if (path.StartsWith("/diagnostics/drivers/", StringComparison.Ordinal))
await WriteDriverDiagnosticsAsync(ctx, path).ConfigureAwait(false);
else
ctx.Response.StatusCode = 404;
break;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Health endpoint handler failure");
try { ctx.Response.StatusCode = 500; } catch { /* ignore */ }
}
finally
{
try { ctx.Response.Close(); } catch { /* ignore */ }
}
}
private async Task WriteHealthzAsync(HttpListenerContext ctx)
{
var configHealthy = _configDbHealthy();
var staleConfig = _usingStaleConfig();
// /healthz is 200 when process alive + (config DB reachable OR cache-warm).
// Stale-config still serves 200 so the process isn't flagged dead when the DB
// blips; the body surfaces the stale flag for operators.
var healthy = configHealthy || staleConfig;
ctx.Response.StatusCode = healthy ? 200 : 503;
var body = JsonSerializer.Serialize(new
{
status = healthy ? "healthy" : "unhealthy",
uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
configDbReachable = configHealthy,
usingStaleConfig = staleConfig,
});
await WriteBodyAsync(ctx, body).ConfigureAwait(false);
}
private async Task WriteReadyzAsync(HttpListenerContext ctx)
{
var snapshots = BuildSnapshots();
var verdict = DriverHealthReport.Aggregate(snapshots);
ctx.Response.StatusCode = DriverHealthReport.HttpStatus(verdict);
var body = JsonSerializer.Serialize(new
{
verdict = verdict.ToString(),
uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
drivers = snapshots.Select(d => new
{
id = d.DriverInstanceId,
state = d.State.ToString(),
detail = d.DetailMessage,
}).ToArray(),
degradedDrivers = snapshots
.Where(d => d.State == DriverState.Degraded || d.State == DriverState.Reconnecting)
.Select(d => d.DriverInstanceId)
.ToArray(),
});
await WriteBodyAsync(ctx, body).ConfigureAwait(false);
}
private IReadOnlyList<DriverHealthSnapshot> BuildSnapshots()
{
var list = new List<DriverHealthSnapshot>();
foreach (var id in _driverHost.RegisteredDriverIds)
{
var driver = _driverHost.GetDriver(id);
if (driver is null) continue;
var health = driver.GetHealth();
list.Add(new DriverHealthSnapshot(driver.DriverInstanceId, health.State, health.LastError));
}
return list;
}
/// <summary>
/// #154 — driver-diagnostics endpoint family. Routes
/// <c>/diagnostics/drivers/{driverId}/modbus/auto-prohibited</c> to the live
/// <see cref="ModbusDriver"/> instance's <see cref="ModbusDriver.GetAutoProhibitedRanges"/>.
/// 404 when the driver instance doesn't exist; 400 when it exists but isn't a Modbus
/// driver (the per-type endpoint is wrong for this row).
/// </summary>
private async Task WriteDriverDiagnosticsAsync(HttpListenerContext ctx, string path)
{
// Path shape: /diagnostics/drivers/{id}/modbus/auto-prohibited
var segments = path.Split('/', StringSplitOptions.RemoveEmptyEntries);
if (segments.Length < 4 || segments[0] != "diagnostics" || segments[1] != "drivers")
{
ctx.Response.StatusCode = 404;
return;
}
var driverId = segments[2];
var driver = _driverHost.GetDriver(driverId);
if (driver is null)
{
ctx.Response.StatusCode = 404;
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new { error = $"Driver '{driverId}' not found" })).ConfigureAwait(false);
return;
}
// Per-driver-type subpath dispatch. Today only Modbus is wired; future drivers add
// their own segments[3] cases.
if (segments.Length >= 5 && segments[3] == "modbus" && segments[4] == "auto-prohibited")
{
if (driver is not ModbusDriver modbus)
{
ctx.Response.StatusCode = 400;
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new { error = $"Driver '{driverId}' is not a Modbus driver (type: {driver.DriverType})" })).ConfigureAwait(false);
return;
}
var ranges = modbus.GetAutoProhibitedRanges();
ctx.Response.StatusCode = 200;
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new
{
driverInstanceId = driverId,
count = ranges.Count,
ranges = ranges.Select(r => new
{
unitId = r.UnitId,
region = r.Region.ToString(),
startAddress = r.StartAddress,
endAddress = r.EndAddress,
lastProbedUtc = r.LastProbedUtc,
bisectionPending = r.BisectionPending,
}).ToArray(),
})).ConfigureAwait(false);
return;
}
ctx.Response.StatusCode = 404;
}
private static async Task WriteBodyAsync(HttpListenerContext ctx, string body)
{
var bytes = Encoding.UTF8.GetBytes(body);
ctx.Response.ContentType = "application/json; charset=utf-8";
ctx.Response.ContentLength64 = bytes.LongLength;
await ctx.Response.OutputStream.WriteAsync(bytes).ConfigureAwait(false);
}
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
_cts?.Cancel();
try { _listener.Stop(); } catch { /* ignore */ }
if (_acceptLoop is not null)
{
try { await _acceptLoop.ConfigureAwait(false); } catch { /* ignore */ }
}
_listener.Close();
_cts?.Dispose();
}
}

View File

@@ -0,0 +1,60 @@
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
/// <summary>
/// Holds pre-loaded <see cref="EquipmentNamespaceContent"/> snapshots keyed by
/// <c>DriverInstanceId</c>. Populated once during <see cref="OpcUaServerService"/> startup
/// (after <see cref="NodeBootstrap"/> resolves the generation) so the synchronous lookup
/// delegate on <see cref="OpcUaApplicationHost"/> can serve the walker from memory without
/// blocking on async DB I/O mid-dispatch.
/// </summary>
/// <remarks>
/// <para>The registry is intentionally a shared mutable singleton with set-once-per-bootstrap
/// semantics rather than an immutable map passed by value — the composition in Program.cs
/// builds <see cref="OpcUaApplicationHost"/> before <see cref="NodeBootstrap"/> runs, so the
/// registry must exist at DI-compose time but be empty until the generation is known. A
/// driver registered after the initial populate pass simply returns null from
/// <see cref="Get"/> + the wire-in falls back to the "no UNS content, let DiscoverAsync own
/// it" path that PR #155 established.</para>
/// </remarks>
public sealed class DriverEquipmentContentRegistry
{
private readonly Dictionary<string, EquipmentNamespaceContent> _content =
new(StringComparer.OrdinalIgnoreCase);
private readonly Lock _lock = new();
public EquipmentNamespaceContent? Get(string driverInstanceId)
{
lock (_lock)
{
return _content.TryGetValue(driverInstanceId, out var c) ? c : null;
}
}
public void Set(string driverInstanceId, EquipmentNamespaceContent content)
{
lock (_lock)
{
_content[driverInstanceId] = content;
}
}
public int Count
{
get { lock (_lock) { return _content.Count; } }
}
/// <summary>
/// Snapshot the current driver → content map. Returns a copy so callers can iterate
/// without holding the lock. Used at authorization bootstrap to merge all namespaces
/// into a single <see cref="Security.NodeScopeResolver"/> path index.
/// </summary>
public IReadOnlyDictionary<string, EquipmentNamespaceContent> Snapshot()
{
lock (_lock)
{
return new Dictionary<string, EquipmentNamespaceContent>(_content, StringComparer.OrdinalIgnoreCase);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,86 @@
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
/// <summary>
/// Loads the <see cref="EquipmentNamespaceContent"/> snapshot the
/// <see cref="EquipmentNodeWalker"/> consumes, scoped to a single
/// (driverInstanceId, generationId) pair. Joins the four row sets the walker expects:
/// UnsAreas for the driver's cluster, UnsLines under those areas, Equipment bound to
/// this driver + its lines, and Tags bound to this driver + its equipment — all at the
/// supplied generation.
/// </summary>
/// <remarks>
/// <para>The walker is driver-instance-scoped (decisions #116#121 put the UNS in the
/// Equipment-kind namespace owned by one driver instance at a time), so this loader is
/// too — a single call returns one driver's worth of rows, never the whole fleet.</para>
///
/// <para>Returns <c>null</c> when the driver instance has no Equipment rows at the
/// supplied generation. The wire-in in <see cref="OpcUaApplicationHost"/> treats null as
/// "this driver has no UNS content, skip the walker and let DiscoverAsync own the whole
/// address space" — the backward-compat path for drivers whose namespace kind is not
/// Equipment (Modbus / AB CIP / TwinCAT / FOCAS).</para>
/// </remarks>
public sealed class EquipmentNamespaceContentLoader
{
private readonly OtOpcUaConfigDbContext _db;
public EquipmentNamespaceContentLoader(OtOpcUaConfigDbContext db)
{
_db = db;
}
/// <summary>
/// Load the walker-shaped snapshot for <paramref name="driverInstanceId"/> at
/// <paramref name="generationId"/>. Returns <c>null</c> when the driver has no
/// Equipment rows at that generation.
/// </summary>
public async Task<EquipmentNamespaceContent?> LoadAsync(
string driverInstanceId, long generationId, CancellationToken ct)
{
var equipment = await _db.Equipment
.AsNoTracking()
.Where(e => e.DriverInstanceId == driverInstanceId && e.GenerationId == generationId && e.Enabled)
.ToListAsync(ct).ConfigureAwait(false);
if (equipment.Count == 0)
return null;
// Filter UNS tree to only the lines + areas that host at least one Equipment bound to
// this driver — skips loading unrelated UNS branches from the cluster. LinesByArea
// grouping is driven off the Equipment rows so an empty line (no equipment) doesn't
// pull a pointless folder into the walker output.
var lineIds = equipment.Select(e => e.UnsLineId).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
var lines = await _db.UnsLines
.AsNoTracking()
.Where(l => l.GenerationId == generationId && lineIds.Contains(l.UnsLineId))
.ToListAsync(ct).ConfigureAwait(false);
var areaIds = lines.Select(l => l.UnsAreaId).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
var areas = await _db.UnsAreas
.AsNoTracking()
.Where(a => a.GenerationId == generationId && areaIds.Contains(a.UnsAreaId))
.ToListAsync(ct).ConfigureAwait(false);
// Tags belonging to this driver at this generation. Walker skips Tags with null
// EquipmentId (those are SystemPlatform-kind Galaxy tags per decision #120) but we
// load them anyway so the same rowset can drive future non-Equipment-kind walks
// without re-hitting the DB. Filtering here is a future optimization; today the
// per-tag cost is bounded by driver scope.
var tags = await _db.Tags
.AsNoTracking()
.Where(t => t.DriverInstanceId == driverInstanceId && t.GenerationId == generationId)
.ToListAsync(ct).ConfigureAwait(false);
return new EquipmentNamespaceContent(
Areas: areas,
Lines: lines,
Equipment: equipment,
Tags: tags);
}
}

View File

@@ -0,0 +1,351 @@
using Microsoft.Extensions.Logging;
using Opc.Ua;
using Opc.Ua.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
using ZB.MOM.WW.OtOpcUa.Server.History;
using ZB.MOM.WW.OtOpcUa.Server.Observability;
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
/// <summary>
/// Wraps <see cref="ApplicationInstance"/> to bring the OPC UA server online — builds an
/// <see cref="ApplicationConfiguration"/> programmatically (no external XML file), ensures
/// the application certificate exists in the PKI store (auto-generates self-signed on first
/// run), starts the server, then walks each <see cref="DriverNodeManager"/> and invokes
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> against it so the driver's
/// discovery streams into the already-running server's address space.
/// </summary>
public sealed class OpcUaApplicationHost : IAsyncDisposable
{
private readonly OpcUaServerOptions _options;
private readonly DriverHost _driverHost;
private readonly IUserAuthenticator _authenticator;
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
private AuthorizationGate? _authzGate;
private NodeScopeResolver? _scopeResolver;
private readonly StaleConfigFlag? _staleConfigFlag;
private readonly Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? _tierLookup;
private readonly Func<string, string?>? _resilienceConfigLookup;
private readonly Func<string, ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNamespaceContent?>? _equipmentContentLookup;
// Phase 7 Stream G follow-up (task #239). When composed with the VirtualTagEngine +
// ScriptedAlarmEngine sources these route node reads to the engines instead of the
// driver. Null = Phase 7 engines not enabled for this deployment (identical to pre-
// Phase-7 behaviour). Late-bindable via SetPhase7Sources because the engines need
// the bootstrapped generation id before they can compose, which is only known after
// the host has been DI-constructed (task #246).
private ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? _virtualReadable;
private ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? _scriptedAlarmReadable;
// PR 1+2.W — server-level singletons. Threaded through to OtOpcUaServer + every
// DriverNodeManager. Default null preserves existing test construction sites that
// don't opt into the new server-side history routing or alarm-condition state machine.
private readonly IHistoryRouter? _historyRouter;
private readonly AlarmConditionService? _alarmConditionService;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<OpcUaApplicationHost> _logger;
private ApplicationInstance? _application;
private OtOpcUaServer? _server;
private HealthEndpointsHost? _healthHost;
private bool _disposed;
public OpcUaApplicationHost(OpcUaServerOptions options, DriverHost driverHost,
IUserAuthenticator authenticator, ILoggerFactory loggerFactory, ILogger<OpcUaApplicationHost> logger,
DriverResiliencePipelineBuilder? pipelineBuilder = null,
AuthorizationGate? authzGate = null,
NodeScopeResolver? scopeResolver = null,
StaleConfigFlag? staleConfigFlag = null,
Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? tierLookup = null,
Func<string, string?>? resilienceConfigLookup = null,
Func<string, ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNamespaceContent?>? equipmentContentLookup = null,
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? virtualReadable = null,
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? scriptedAlarmReadable = null,
IHistoryRouter? historyRouter = null,
AlarmConditionService? alarmConditionService = null)
{
_options = options;
_driverHost = driverHost;
_authenticator = authenticator;
_pipelineBuilder = pipelineBuilder ?? new DriverResiliencePipelineBuilder();
_authzGate = authzGate;
_scopeResolver = scopeResolver;
_staleConfigFlag = staleConfigFlag;
_tierLookup = tierLookup;
_resilienceConfigLookup = resilienceConfigLookup;
_equipmentContentLookup = equipmentContentLookup;
_virtualReadable = virtualReadable;
_scriptedAlarmReadable = scriptedAlarmReadable;
_historyRouter = historyRouter;
_alarmConditionService = alarmConditionService;
_loggerFactory = loggerFactory;
_logger = logger;
}
public OtOpcUaServer? Server => _server;
/// <summary>
/// Late-bind the Phase 7 engine-backed <c>IReadable</c> sources. Must be
/// called BEFORE <see cref="StartAsync"/> — once the OPC UA server starts, the
/// <see cref="OtOpcUaServer"/> ctor captures the field values + per-node
/// <see cref="DriverNodeManager"/>s are constructed. Calling this after start has
/// no effect on already-materialized node managers.
/// </summary>
public void SetPhase7Sources(
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? virtualReadable,
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? scriptedAlarmReadable)
{
if (_server is not null)
throw new InvalidOperationException(
"Phase 7 sources must be set before OpcUaApplicationHost.StartAsync; the OtOpcUaServer + DriverNodeManagers have already captured the previous values.");
_virtualReadable = virtualReadable;
_scriptedAlarmReadable = scriptedAlarmReadable;
}
/// <summary>
/// Late-bind the Phase 6.2 authorization gate + node-scope resolver. Must be called
/// BEFORE <see cref="StartAsync"/> — once the OPC UA server starts the
/// <see cref="OtOpcUaServer"/> + per-namespace <see cref="DriverNodeManager"/>s
/// capture these fields and later rebinding has no effect on already-materialized
/// managers. Call with <c>null</c> for either parameter to leave the corresponding
/// pipeline inert.
/// </summary>
public void SetAuthorization(AuthorizationGate? gate, NodeScopeResolver? resolver)
{
if (_server is not null)
throw new InvalidOperationException(
"Authorization must be set before OpcUaApplicationHost.StartAsync; the OtOpcUaServer + DriverNodeManagers have already captured the previous values.");
_authzGate = gate;
_scopeResolver = resolver;
}
/// <summary>
/// Builds the <see cref="ApplicationConfiguration"/>, validates/creates the application
/// certificate, constructs + starts the <see cref="OtOpcUaServer"/>, then drives
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> per registered driver so
/// the address space is populated before the first client connects.
/// </summary>
public async Task StartAsync(CancellationToken ct)
{
_application = new ApplicationInstance
{
ApplicationName = _options.ApplicationName,
ApplicationType = ApplicationType.Server,
ApplicationConfiguration = BuildConfiguration(),
};
var hasCert = await _application.CheckApplicationInstanceCertificate(silent: true, minimumKeySize: CertificateFactory.DefaultKeySize).ConfigureAwait(false);
if (!hasCert)
throw new InvalidOperationException(
$"OPC UA application certificate could not be validated or created in {_options.PkiStoreRoot}");
_server = new OtOpcUaServer(_driverHost, _authenticator, _pipelineBuilder, _loggerFactory,
authzGate: _authzGate, scopeResolver: _scopeResolver,
tierLookup: _tierLookup, resilienceConfigLookup: _resilienceConfigLookup,
virtualReadable: _virtualReadable, scriptedAlarmReadable: _scriptedAlarmReadable,
anonymousRoles: _options.AnonymousRoles,
historyRouter: _historyRouter, alarmConditionService: _alarmConditionService);
await _application.Start(_server).ConfigureAwait(false);
_logger.LogInformation("OPC UA server started — endpoint={Endpoint} driverCount={Count}",
_options.EndpointUrl, _server.DriverNodeManagers.Count);
// Phase 6.1 Stream C: health endpoints on :4841 (loopback by default — see
// HealthEndpointsHost remarks for the Windows URL-ACL tradeoff).
if (_options.HealthEndpointsEnabled)
{
_healthHost = new HealthEndpointsHost(
_driverHost,
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
usingStaleConfig: _staleConfigFlag is null ? null : () => _staleConfigFlag.IsStale,
prefix: _options.HealthEndpointsPrefix);
_healthHost.Start();
}
// Drive each driver's discovery through its node manager. The node manager IS the
// IAddressSpaceBuilder; GenericDriverNodeManager captures alarm-condition sinks into
// its internal map and wires OnAlarmEvent → sink routing.
//
// ADR-001 Option A — when an EquipmentNamespaceContent is supplied for an
// Equipment-kind driver, run the EquipmentNodeWalker BEFORE the driver's DiscoverAsync
// so the UNS folder skeleton (Area/Line/Equipment) + Identification sub-folders +
// the five identifier properties (decision #121) are in place. DiscoverAsync then
// streams the driver's native shape on top; Tag rows bound to Equipment already
// materialized via the walker don't get duplicated because the driver's DiscoverAsync
// output is authoritative for its own native references only.
foreach (var nodeManager in _server.DriverNodeManagers)
{
var driverId = nodeManager.Driver.DriverInstanceId;
try
{
if (_equipmentContentLookup is not null)
{
var content = _equipmentContentLookup(driverId);
if (content is not null)
{
ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNodeWalker.Walk(nodeManager, content);
_logger.LogInformation(
"UNS walker populated {Areas} area(s), {Lines} line(s), {Equipment} equipment, {Tags} tag(s) for driver {Driver}",
content.Areas.Count, content.Lines.Count, content.Equipment.Count, content.Tags.Count, driverId);
}
}
var generic = new GenericDriverNodeManager(nodeManager.Driver);
await generic.BuildAddressSpaceAsync(nodeManager, ct).ConfigureAwait(false);
_logger.LogInformation("Address space populated for driver {Driver}", driverId);
}
catch (Exception ex)
{
// Per decision #12: driver exceptions isolate — log and keep the server serving
// the other drivers' subtrees. Re-building this one takes a Reinitialize call.
_logger.LogError(ex, "Discovery failed for driver {Driver}; subtree faulted", driverId);
}
}
}
private ApplicationConfiguration BuildConfiguration()
{
Directory.CreateDirectory(_options.PkiStoreRoot);
var cfg = new ApplicationConfiguration
{
ApplicationName = _options.ApplicationName,
ApplicationUri = _options.ApplicationUri,
ApplicationType = ApplicationType.Server,
ProductUri = "urn:OtOpcUa:Server",
SecurityConfiguration = new SecurityConfiguration
{
ApplicationCertificate = new CertificateIdentifier
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(_options.PkiStoreRoot, "own"),
SubjectName = "CN=" + _options.ApplicationName,
},
TrustedIssuerCertificates = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(_options.PkiStoreRoot, "issuers"),
},
TrustedPeerCertificates = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(_options.PkiStoreRoot, "trusted"),
},
RejectedCertificateStore = new CertificateTrustList
{
StoreType = CertificateStoreType.Directory,
StorePath = Path.Combine(_options.PkiStoreRoot, "rejected"),
},
AutoAcceptUntrustedCertificates = _options.AutoAcceptUntrustedClientCertificates,
AddAppCertToTrustedStore = true,
},
TransportConfigurations = new TransportConfigurationCollection(),
TransportQuotas = new TransportQuotas { OperationTimeout = 15000 },
ServerConfiguration = new ServerConfiguration
{
BaseAddresses = new StringCollection { _options.EndpointUrl },
SecurityPolicies = BuildSecurityPolicies(),
UserTokenPolicies = BuildUserTokenPolicies(),
MinRequestThreadCount = 5,
MaxRequestThreadCount = 100,
MaxQueuedRequestCount = 200,
},
TraceConfiguration = new TraceConfiguration(),
};
cfg.Validate(ApplicationType.Server).GetAwaiter().GetResult();
if (cfg.SecurityConfiguration.AutoAcceptUntrustedCertificates)
{
cfg.CertificateValidator.CertificateValidation += (_, e) =>
{
if (e.Error.StatusCode == StatusCodes.BadCertificateUntrusted)
e.Accept = true;
};
}
return cfg;
}
private ServerSecurityPolicyCollection BuildSecurityPolicies()
{
var policies = new ServerSecurityPolicyCollection
{
// Keep the None policy present so legacy clients can discover + browse. Locked-down
// deployments remove this by setting Ldap.Enabled=true + dropping None here; left in
// for PR 19 so the PR 17 test harness continues to pass unchanged.
new ServerSecurityPolicy
{
SecurityMode = MessageSecurityMode.None,
SecurityPolicyUri = SecurityPolicies.None,
},
};
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt)
{
policies.Add(new ServerSecurityPolicy
{
SecurityMode = MessageSecurityMode.SignAndEncrypt,
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
});
}
return policies;
}
private UserTokenPolicyCollection BuildUserTokenPolicies()
{
var tokens = new UserTokenPolicyCollection
{
new UserTokenPolicy(UserTokenType.Anonymous)
{
PolicyId = "Anonymous",
SecurityPolicyUri = SecurityPolicies.None,
},
};
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt
&& _options.Ldap.Enabled)
{
tokens.Add(new UserTokenPolicy(UserTokenType.UserName)
{
PolicyId = "UserName",
// Passwords must ride an encrypted channel — scope this token to Basic256Sha256
// so the stack rejects any attempt to send UserName over the None endpoint.
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
});
}
return tokens;
}
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
try
{
_server?.Stop();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "OPC UA server stop threw during dispose");
}
if (_healthHost is not null)
{
try { await _healthHost.DisposeAsync().ConfigureAwait(false); }
catch (Exception ex) { _logger.LogWarning(ex, "Health endpoints host dispose threw"); }
}
await Task.CompletedTask;
}
}

View File

@@ -0,0 +1,99 @@
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
/// <summary>
/// OPC UA transport security profile selector. Controls which <c>ServerSecurityPolicy</c>
/// entries the endpoint advertises + which token types the <c>UserTokenPolicies</c> permits.
/// </summary>
public enum OpcUaSecurityProfile
{
/// <summary>Anonymous only on <c>SecurityPolicies.None</c> — dev-only, no signing or encryption.</summary>
None,
/// <summary>
/// <c>Basic256Sha256 SignAndEncrypt</c> with <c>UserName</c> and <c>Anonymous</c> token
/// policies. Clients must present a valid application certificate + user credentials.
/// </summary>
Basic256Sha256SignAndEncrypt,
}
/// <summary>
/// OPC UA server endpoint + application-identity configuration. Bound from the
/// <c>OpcUaServer</c> section of <c>appsettings.json</c>. PR 17 minimum-viable scope: no LDAP,
/// no security profiles beyond None — those wire in alongside a future deployment-policy PR
/// that reads from the central config DB instead of appsettings.
/// </summary>
public sealed class OpcUaServerOptions
{
public const string SectionName = "OpcUaServer";
/// <summary>
/// Fully-qualified endpoint URI clients connect to. Use <c>0.0.0.0</c> to bind all
/// interfaces; the stack rewrites to the machine's hostname for the returned endpoint
/// description at GetEndpoints time.
/// </summary>
public string EndpointUrl { get; init; } = "opc.tcp://0.0.0.0:4840/OtOpcUa";
/// <summary>Human-readable application name surfaced in the endpoint description.</summary>
public string ApplicationName { get; init; } = "OtOpcUa Server";
/// <summary>Stable application URI — must match the subjectAltName of the app cert.</summary>
public string ApplicationUri { get; init; } = "urn:OtOpcUa:Server";
/// <summary>
/// Directory where the OPC UA stack stores the application certificate + trusted /
/// rejected cert folders. Defaults to <c>%ProgramData%\OtOpcUa\pki</c>; the stack
/// creates the directory tree on first run and generates a self-signed cert.
/// </summary>
public string PkiStoreRoot { get; init; } =
System.IO.Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData),
"OtOpcUa", "pki");
/// <summary>
/// When true, the stack auto-trusts client certs on first connect. Dev-default = true,
/// production deployments should flip this to false and manually trust clients via the
/// Admin UI.
/// </summary>
public bool AutoAcceptUntrustedClientCertificates { get; init; } = true;
/// <summary>
/// Whether to start the Phase 6.1 Stream C <c>/healthz</c> + <c>/readyz</c> HTTP listener.
/// Defaults to <c>true</c>; set false in embedded deployments that don't need HTTP
/// (e.g. tests that only exercise the OPC UA surface).
/// </summary>
public bool HealthEndpointsEnabled { get; init; } = true;
/// <summary>
/// URL prefix the health endpoints bind to. Default <c>http://localhost:4841/</c> — loopback
/// avoids Windows URL-ACL elevation. Production deployments that need remote probing should
/// either reverse-proxy or use <c>http://+:4841/</c> with netsh urlacl granted.
/// </summary>
public string HealthEndpointsPrefix { get; init; } = "http://localhost:4841/";
/// <summary>
/// Security profile advertised on the endpoint. Default <see cref="OpcUaSecurityProfile.None"/>
/// preserves the PR 17 endpoint shape; set to <see cref="OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt"/>
/// for production deployments with LDAP-backed UserName auth.
/// </summary>
public OpcUaSecurityProfile SecurityProfile { get; init; } = OpcUaSecurityProfile.None;
/// <summary>
/// LDAP binding for UserName token validation. Only consulted when the active
/// <see cref="SecurityProfile"/> advertises a UserName token policy. When
/// <c>LdapOptions.Enabled = false</c>, UserName token attempts are rejected.
/// </summary>
public LdapOptions Ldap { get; init; } = new();
/// <summary>
/// Roles granted to anonymous OPC UA sessions. Default empty — anonymous clients can
/// read <c>FreeAccess</c> attributes but cannot write <c>Operate</c>/<c>Tune</c>/
/// <c>Configure</c> tags (<see cref="WriteAuthzPolicy"/> rejects the empty role set).
/// Dev + smoke-test deployments that need anonymous writes populate this with the
/// role names they want, e.g. <c>["WriteOperate"]</c> to match v1's anonymous-can-
/// operate default. Production deployments leave it empty + route operators through
/// UserName auth.
/// </summary>
public IReadOnlyList<string> AnonymousRoles { get; init; } = [];
}

View File

@@ -0,0 +1,204 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Opc.Ua;
using Opc.Ua.Server;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
using ZB.MOM.WW.OtOpcUa.Server.History;
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
/// <summary>
/// <see cref="StandardServer"/> subclass that wires one <see cref="DriverNodeManager"/> per
/// registered driver from <see cref="DriverHost"/>. Anonymous endpoint on
/// <c>opc.tcp://0.0.0.0:4840</c>, no security — PR 16 minimum-viable scope; LDAP + security
/// profiles are deferred to their own PR on top of this.
/// </summary>
public sealed class OtOpcUaServer : StandardServer
{
private readonly DriverHost _driverHost;
private readonly IUserAuthenticator _authenticator;
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
private readonly AuthorizationGate? _authzGate;
private readonly NodeScopeResolver? _scopeResolver;
private readonly Func<string, DriverTier>? _tierLookup;
private readonly Func<string, string?>? _resilienceConfigLookup;
// Phase 7 Stream G follow-up wiring (task #239). Shared across every DriverNodeManager
// instantiated by this server so virtual-tag reads and scripted-alarm reads from any
// driver's address-space subtree route to the same engine. When null (no Phase 7
// engines composed for this deployment) DriverNodeManager falls back to driver-only
// dispatch — identical to pre-Phase-7 behaviour.
private readonly IReadable? _virtualReadable;
private readonly IReadable? _scriptedAlarmReadable;
// PR 1+2.W — server-level singletons shared across every DriverNodeManager.
// Null when the deployment hasn't opted into the new server-side history routing /
// server-side alarm-condition state machine; DriverNodeManager falls back to the
// legacy per-driver IHistoryProvider + IAlarmSource paths in that case.
private readonly IHistoryRouter? _historyRouter;
private readonly AlarmConditionService? _alarmConditionService;
/// <summary>
/// Roles granted to anonymous sessions. When non-empty, <see cref="OnImpersonateUser"/>
/// wraps <c>AnonymousIdentityToken</c> in a <see cref="RoleBasedIdentity"/> carrying
/// these roles so <see cref="DriverNodeManager"/>'s write-authz check passes for
/// matching classifications. Empty (the default) preserves the pre-existing behaviour
/// of rejecting anonymous writes at <c>Operate</c> or higher.
/// </summary>
private readonly IReadOnlyList<string> _anonymousRoles;
private readonly ILoggerFactory _loggerFactory;
private readonly List<DriverNodeManager> _driverNodeManagers = new();
public OtOpcUaServer(
DriverHost driverHost,
IUserAuthenticator authenticator,
DriverResiliencePipelineBuilder pipelineBuilder,
ILoggerFactory loggerFactory,
AuthorizationGate? authzGate = null,
NodeScopeResolver? scopeResolver = null,
Func<string, DriverTier>? tierLookup = null,
Func<string, string?>? resilienceConfigLookup = null,
IReadable? virtualReadable = null,
IReadable? scriptedAlarmReadable = null,
IReadOnlyList<string>? anonymousRoles = null,
IHistoryRouter? historyRouter = null,
AlarmConditionService? alarmConditionService = null)
{
_driverHost = driverHost;
_authenticator = authenticator;
_pipelineBuilder = pipelineBuilder;
_authzGate = authzGate;
_scopeResolver = scopeResolver;
_tierLookup = tierLookup;
_resilienceConfigLookup = resilienceConfigLookup;
_virtualReadable = virtualReadable;
_scriptedAlarmReadable = scriptedAlarmReadable;
_anonymousRoles = anonymousRoles ?? [];
_historyRouter = historyRouter;
_alarmConditionService = alarmConditionService;
_loggerFactory = loggerFactory;
}
/// <summary>
/// Read-only snapshot of the driver node managers materialized at server start. Used by
/// the generic-driver-node-manager-driven discovery flow after the server starts — the
/// host walks each entry and invokes
/// <c>GenericDriverNodeManager.BuildAddressSpaceAsync(manager)</c> passing the manager
/// as its own <see cref="IAddressSpaceBuilder"/>.
/// </summary>
public IReadOnlyList<DriverNodeManager> DriverNodeManagers => _driverNodeManagers;
protected override MasterNodeManager CreateMasterNodeManager(IServerInternal server, ApplicationConfiguration configuration)
{
foreach (var driverId in _driverHost.RegisteredDriverIds)
{
var driver = _driverHost.GetDriver(driverId);
if (driver is null) continue;
var logger = _loggerFactory.CreateLogger<DriverNodeManager>();
// Per-driver resilience options: tier comes from lookup (Phase 6.1 Stream B.1
// DriverTypeRegistry in the prod wire-up) or falls back to Tier A. ResilienceConfig
// JSON comes from the DriverInstance row via the optional lookup Func; parser
// layers JSON overrides on top of tier defaults (Phase 6.1 Stream A.2).
var tier = _tierLookup?.Invoke(driver.DriverType) ?? DriverTier.A;
var resilienceJson = _resilienceConfigLookup?.Invoke(driver.DriverInstanceId);
var options = DriverResilienceOptionsParser.ParseOrDefaults(tier, resilienceJson, out var diag);
if (diag is not null)
logger.LogWarning("ResilienceConfig parse diagnostic for driver {DriverId}: {Diag}", driver.DriverInstanceId, diag);
var invoker = new CapabilityInvoker(_pipelineBuilder, driver.DriverInstanceId, () => options, driver.DriverType);
var manager = new DriverNodeManager(server, configuration, driver, invoker, logger,
authzGate: _authzGate, scopeResolver: _scopeResolver,
virtualReadable: _virtualReadable, scriptedAlarmReadable: _scriptedAlarmReadable,
historyRouter: _historyRouter, alarmService: _alarmConditionService);
// The router stays empty after PR 1+2.W — DriverNodeManager's internal
// LegacyDriverHistoryAdapter handles every driver that still implements
// IHistoryProvider. PR 3.W will register the Wonderware sidecar as a router
// source; PR 7.2 retires the legacy fallback entirely.
_driverNodeManagers.Add(manager);
}
return new MasterNodeManager(server, configuration, null, _driverNodeManagers.ToArray());
}
protected override void OnServerStarted(IServerInternal server)
{
base.OnServerStarted(server);
// Hook UserName / Anonymous token validation here. Anonymous passes through; UserName
// is validated against the IUserAuthenticator (LDAP in production). Rejected identities
// throw ServiceResultException which the stack translates to Bad_IdentityTokenInvalid.
server.SessionManager.ImpersonateUser += OnImpersonateUser;
}
private void OnImpersonateUser(Session session, ImpersonateEventArgs args)
{
switch (args.NewIdentity)
{
case AnonymousIdentityToken:
args.Identity = _anonymousRoles.Count == 0
? new UserIdentity() // anonymous, no roles — production default
: new RoleBasedIdentity("(anonymous)", "Anonymous", _anonymousRoles, ldapGroups: []);
return;
case UserNameIdentityToken user:
{
var result = _authenticator.AuthenticateAsync(
user.UserName, user.DecryptedPassword, CancellationToken.None)
.GetAwaiter().GetResult();
if (!result.Success)
{
throw ServiceResultException.Create(
StatusCodes.BadUserAccessDenied,
"Invalid username or password ({0})", result.Error ?? "no detail");
}
args.Identity = new RoleBasedIdentity(user.UserName, result.DisplayName, result.Roles, result.Groups);
return;
}
default:
throw ServiceResultException.Create(
StatusCodes.BadIdentityTokenInvalid,
"Unsupported user identity token type: {0}", args.NewIdentity?.GetType().Name ?? "null");
}
}
/// <summary>
/// Tiny UserIdentity carrier that preserves the resolved roles + LDAP groups so downstream
/// node managers can gate writes/reads via <c>session.Identity</c>. Implements both
/// <see cref="IRoleBearer"/> (control-plane: WriteAuthzPolicy + Admin role mapping) and
/// <see cref="ILdapGroupsBearer"/> (data-plane: <see cref="AuthorizationGate"/> evaluator).
/// Anonymous identity (no roles configured) still uses the stack's default UserIdentity.
/// </summary>
private sealed class RoleBasedIdentity : UserIdentity, IRoleBearer, ILdapGroupsBearer
{
public IReadOnlyList<string> Roles { get; }
public IReadOnlyList<string> LdapGroups { get; }
public string? Display { get; }
public RoleBasedIdentity(string userName, string? displayName, IReadOnlyList<string> roles, IReadOnlyList<string> ldapGroups)
: base(userName, "")
{
Display = displayName;
Roles = roles;
LdapGroups = ldapGroups;
}
}
protected override ServerProperties LoadServerProperties() => new()
{
ManufacturerName = "OtOpcUa",
ProductName = "OtOpcUa.Server",
ProductUri = "urn:OtOpcUa:Server",
SoftwareVersion = "2.0.0",
BuildNumber = "0",
BuildDate = DateTime.UtcNow,
};
}

View File

@@ -0,0 +1,122 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
using ZB.MOM.WW.OtOpcUa.Server.Phase7;
using ZB.MOM.WW.OtOpcUa.Server.Security;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// BackgroundService that owns the OPC UA server lifecycle (decision #30, replacing TopShelf).
/// Bootstraps config, starts the <see cref="DriverHost"/>, starts the OPC UA server via
/// <see cref="OpcUaApplicationHost"/>, drives each driver's discovery into the address space,
/// runs until stopped.
/// </summary>
public sealed class OpcUaServerService(
NodeBootstrap bootstrap,
DriverHost driverHost,
OpcUaApplicationHost applicationHost,
DriverEquipmentContentRegistry equipmentContentRegistry,
DriverInstanceBootstrapper driverBootstrapper,
Phase7Composer phase7Composer,
AuthorizationBootstrap authorizationBootstrap,
IServiceScopeFactory scopeFactory,
ILogger<OpcUaServerService> logger) : BackgroundService
{
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation("OtOpcUa.Server starting");
var result = await bootstrap.LoadCurrentGenerationAsync(stoppingToken);
logger.LogInformation("Bootstrap complete: source={Source} generation={Gen}", result.Source, result.GenerationId);
// ADR-001 Option A — populate per-driver Equipment namespace snapshots into the
// registry before StartAsync walks the address space. The walker on the OPC UA side
// reads synchronously from the registry; pre-loading here means the hot path stays
// non-blocking + each driver pays at most one Config-DB query at bootstrap time.
// Skipped when no generation is Published yet — the fleet boots into a UNS-less
// address space until the first publish, then the registry fills on next restart.
if (result.GenerationId is { } gen)
{
// Task #248 — register IDriver instances from the published DriverInstance
// rows BEFORE the equipment-content load + Phase 7 compose, so the rest of
// the pipeline sees a populated DriverHost. Without this step Phase 7's
// CachedTagUpstreamSource has no upstream feed + virtual-tag scripts read
// BadNodeIdUnknown for every tag path (gap surfaced by task #240 smoke).
await driverBootstrapper.RegisterDriversFromGenerationAsync(gen, stoppingToken);
await PopulateEquipmentContentAsync(gen, stoppingToken);
// Phase 7 follow-up #246 — load Script + VirtualTag + ScriptedAlarm rows,
// compose VirtualTagEngine + ScriptedAlarmEngine, start the driver-bridge
// feed. SetPhase7Sources MUST run before applicationHost.StartAsync because
// OtOpcUaServer + DriverNodeManager construction captures the field values
// — late binding after server start is rejected with InvalidOperationException.
// No-op when the generation has no virtual tags or scripted alarms.
var phase7 = await phase7Composer.PrepareAsync(gen, stoppingToken);
applicationHost.SetPhase7Sources(phase7.VirtualReadable, phase7.ScriptedAlarmReadable);
// Phase 6.2 Stream C wiring — build the AuthorizationGate + NodeScopeResolver
// from the published generation's NodeAcl rows and the populated equipment
// registry. No-op when Node:Authorization:Enabled=false. Must run before
// StartAsync: OtOpcUaServer + DriverNodeManager construction captures the
// field values on the application host.
var (authzGate, scopeResolver) = await authorizationBootstrap
.BuildAsync(gen, stoppingToken).ConfigureAwait(false);
applicationHost.SetAuthorization(authzGate, scopeResolver);
}
await applicationHost.StartAsync(stoppingToken);
logger.LogInformation("OtOpcUa.Server running. Hosted drivers: {Count}", driverHost.RegisteredDriverIds.Count);
try
{
await Task.Delay(Timeout.InfiniteTimeSpan, stoppingToken);
}
catch (OperationCanceledException)
{
logger.LogInformation("OtOpcUa.Server stopping");
}
}
public override async Task StopAsync(CancellationToken cancellationToken)
{
await base.StopAsync(cancellationToken);
// Dispose Phase 7 first so the bridge stops feeding the cache + the engines
// stop firing alarm/historian events before the OPC UA server tears down its
// node managers. Otherwise an in-flight cascade could try to push through a
// disposed source and surface as a noisy shutdown warning.
await phase7Composer.DisposeAsync();
await applicationHost.DisposeAsync();
await driverHost.DisposeAsync();
}
/// <summary>
/// Pre-load an <c>EquipmentNamespaceContent</c> snapshot for each registered driver at
/// the bootstrapped generation. Null results (driver has no Equipment rows —
/// Modbus/AB CIP/TwinCAT/FOCAS today per decisions #116#121) are skipped: the walker
/// wire-in sees Get(driverId) return null + falls back to DiscoverAsync-owns-it.
/// Opens one scope so the scoped <c>OtOpcUaConfigDbContext</c> is shared across all
/// per-driver queries rather than paying scope-setup overhead per driver.
/// </summary>
private async Task PopulateEquipmentContentAsync(long generationId, CancellationToken ct)
{
using var scope = scopeFactory.CreateScope();
var loader = scope.ServiceProvider.GetRequiredService<EquipmentNamespaceContentLoader>();
var loaded = 0;
foreach (var driverId in driverHost.RegisteredDriverIds)
{
var content = await loader.LoadAsync(driverId, generationId, ct).ConfigureAwait(false);
if (content is null) continue;
equipmentContentRegistry.Set(driverId, content);
loaded++;
}
logger.LogInformation(
"Equipment namespace snapshots loaded for {Count}/{Total} driver(s) at generation {Gen}",
loaded, driverHost.RegisteredDriverIds.Count, generationId);
}
}

View File

@@ -0,0 +1,84 @@
using System.Collections.Concurrent;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
/// <summary>
/// Production <c>ITagUpstreamSource</c> for the Phase 7 engines (implements both the
/// Core.VirtualTags and Core.ScriptedAlarms variants — identical shape, distinct
/// namespaces). Per the interface docstring, reads are synchronous — user scripts
/// call <c>ctx.GetTag</c> inline — so we serve from a last-known-value cache that
/// the driver-bridge populates asynchronously via <see cref="Push"/>.
/// </summary>
/// <remarks>
/// <para>
/// <see cref="Push"/> is called by the driver-bridge (wiring added by task #244)
/// every time a driver's <c>ISubscribable.OnDataChange</c> fires. Subscribers
/// registered via <see cref="SubscribeTag"/> are notified synchronously on the
/// calling thread — the VirtualTagEngine + ScriptedAlarmEngine handle their own
/// async hand-off via <c>SemaphoreSlim</c>.
/// </para>
/// <para>
/// Reads of a path that has never been <see cref="Push"/>-ed return
/// <see cref="UpstreamNotConfigured"/>-quality — which scripts see as
/// <c>ctx.GetTag("...").StatusCode == BadNodeIdUnknown</c> and can branch on.
/// </para>
/// </remarks>
public sealed class CachedTagUpstreamSource
: Core.VirtualTags.ITagUpstreamSource,
Core.ScriptedAlarms.ITagUpstreamSource
{
private readonly ConcurrentDictionary<string, DataValueSnapshot> _values = new(StringComparer.Ordinal);
private readonly ConcurrentDictionary<string, List<Action<string, DataValueSnapshot>>> _observers
= new(StringComparer.Ordinal);
public DataValueSnapshot ReadTag(string path)
{
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
return _values.TryGetValue(path, out var snap)
? snap
: new DataValueSnapshot(null, UpstreamNotConfigured, null, DateTime.UtcNow);
}
public IDisposable SubscribeTag(string path, Action<string, DataValueSnapshot> observer)
{
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
ArgumentNullException.ThrowIfNull(observer);
var list = _observers.GetOrAdd(path, _ => []);
lock (list) list.Add(observer);
return new Unsub(this, path, observer);
}
/// <summary>
/// Driver-bridge write path — called when a driver delivers a value change for
/// <paramref name="path"/>. Updates the cache + fans out to every observer.
/// Safe for concurrent callers; observers fire on the caller's thread.
/// </summary>
public void Push(string path, DataValueSnapshot snapshot)
{
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
ArgumentNullException.ThrowIfNull(snapshot);
_values[path] = snapshot;
if (!_observers.TryGetValue(path, out var list)) return;
Action<string, DataValueSnapshot>[] snapshotList;
lock (list) snapshotList = list.ToArray();
foreach (var observer in snapshotList) observer(path, snapshot);
}
/// <summary>Mirror of OPC UA <c>StatusCodes.BadNodeIdUnknown</c> without pulling the OPC stack dependency.</summary>
public const uint UpstreamNotConfigured = 0x80340000;
private sealed class Unsub(CachedTagUpstreamSource owner, string path, Action<string, DataValueSnapshot> observer) : IDisposable
{
private bool _disposed;
public void Dispose()
{
if (_disposed) return;
_disposed = true;
if (owner._observers.TryGetValue(path, out var list))
lock (list) list.Remove(observer);
}
}
}

View File

@@ -0,0 +1,146 @@
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
/// <summary>
/// Phase 7 follow-up (task #244). Subscribes to live driver <see cref="ISubscribable"/>
/// surfaces for every input path the Phase 7 engines care about + pushes incoming
/// <see cref="DataChangeEventArgs.Snapshot"/>s into <see cref="CachedTagUpstreamSource"/>
/// so <c>ctx.GetTag</c> reads see the freshest driver value.
/// </summary>
/// <remarks>
/// <para>
/// Each <see cref="DriverFeed"/> declares a driver + the path-to-fullRef map for the
/// attributes that driver provides. The bridge groups by driver so each <see cref="ISubscribable"/>
/// gets one <c>SubscribeAsync</c> call with a batched fullRef list — drivers that
/// poll under the hood (Modbus, AB CIP, S7) consolidate the polls; drivers with
/// native subscriptions (Galaxy, OPC UA Client, TwinCAT) get a single watch list.
/// </para>
/// <para>
/// Because driver fullRefs are opaque + driver-specific (Galaxy
/// <c>"DelmiaReceiver_001.Temp"</c>, Modbus <c>"40001"</c>, AB CIP
/// <c>"Temperature[0]"</c>), the bridge keeps a per-feed reverse map from fullRef
/// back to UNS path. <c>OnDataChange</c> fires keyed by fullRef; the bridge
/// translates to the script-side path before calling <see cref="CachedTagUpstreamSource.Push"/>.
/// </para>
/// <para>
/// Lifecycle: construct → <see cref="StartAsync"/> with the feeds → keep alive
/// alongside the engines → <see cref="DisposeAsync"/> unsubscribes from every
/// driver + unhooks the OnDataChange handlers. Driver subscriptions don't leak
/// even on abnormal shutdown because the disposal awaits each
/// <c>UnsubscribeAsync</c>.
/// </para>
/// </remarks>
public sealed class DriverSubscriptionBridge : IAsyncDisposable
{
private readonly CachedTagUpstreamSource _sink;
private readonly ILogger<DriverSubscriptionBridge> _logger;
private readonly List<ActiveSubscription> _active = [];
private bool _started;
private bool _disposed;
public DriverSubscriptionBridge(
CachedTagUpstreamSource sink,
ILogger<DriverSubscriptionBridge> logger)
{
_sink = sink ?? throw new ArgumentNullException(nameof(sink));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Subscribe each feed's driver to its declared fullRefs + wire push-to-cache.
/// Idempotent guard rejects double-start. Throws on the first subscribe failure
/// so misconfiguration surfaces fast — partial-subscribe state doesn't linger.
/// </summary>
public async Task StartAsync(IEnumerable<DriverFeed> feeds, CancellationToken ct)
{
ArgumentNullException.ThrowIfNull(feeds);
if (_disposed) throw new ObjectDisposedException(nameof(DriverSubscriptionBridge));
if (_started) throw new InvalidOperationException("DriverSubscriptionBridge already started");
_started = true;
foreach (var feed in feeds)
{
if (feed.PathToFullRef.Count == 0) continue;
// Reverse map for OnDataChange dispatch — driver fires keyed by FullReference,
// we push keyed by the script-side path.
var fullRefToPath = feed.PathToFullRef
.ToDictionary(kv => kv.Value, kv => kv.Key, StringComparer.Ordinal);
var fullRefs = feed.PathToFullRef.Values.Distinct(StringComparer.Ordinal).ToList();
EventHandler<DataChangeEventArgs> handler = (_, e) =>
{
if (fullRefToPath.TryGetValue(e.FullReference, out var unsPath))
_sink.Push(unsPath, e.Snapshot);
};
feed.Driver.OnDataChange += handler;
try
{
// OTOPCUA0001 suppression — the analyzer flags ISubscribable calls outside
// CapabilityInvoker. This bridge IS the lifecycle-coordinator for Phase 7
// subscriptions: it runs once at engine compose, doesn't hot-path per
// script evaluation (the engines read from the cache instead), and surfaces
// any subscribe failure by aborting bridge start. Wrapping in the per-call
// resilience pipeline would add nothing — there's no caller to retry on
// behalf of, and the breaker/bulkhead semantics belong to actual driver Read
// dispatch, which still goes through CapabilityInvoker via DriverNodeManager.
#pragma warning disable OTOPCUA0001
var handle = await feed.Driver.SubscribeAsync(fullRefs, feed.PublishingInterval, ct).ConfigureAwait(false);
#pragma warning restore OTOPCUA0001
_active.Add(new ActiveSubscription(feed.Driver, handle, handler));
_logger.LogInformation(
"Phase 7 bridge subscribed {Count} attribute(s) from driver {Driver} (handle {Handle})",
fullRefs.Count, feed.Driver.GetType().Name, handle.DiagnosticId);
}
catch
{
feed.Driver.OnDataChange -= handler;
throw;
}
}
}
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
foreach (var sub in _active)
{
sub.Driver.OnDataChange -= sub.Handler;
try
{
#pragma warning disable OTOPCUA0001 // bridge lifecycle — see StartAsync suppression rationale
await sub.Driver.UnsubscribeAsync(sub.Handle, CancellationToken.None).ConfigureAwait(false);
#pragma warning restore OTOPCUA0001
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Driver {Driver} UnsubscribeAsync threw on bridge dispose (handle {Handle})",
sub.Driver.GetType().Name, sub.Handle.DiagnosticId);
}
}
_active.Clear();
}
private sealed record ActiveSubscription(
ISubscribable Driver,
ISubscriptionHandle Handle,
EventHandler<DataChangeEventArgs> Handler);
}
/// <summary>
/// One driver's contribution to the Phase 7 bridge — the driver's <see cref="ISubscribable"/>
/// surface plus the path-to-fullRef map the bridge uses to translate driver-side
/// <see cref="DataChangeEventArgs.FullReference"/> back to script-side paths.
/// </summary>
/// <param name="Driver">The driver's subscribable surface (every shipped driver implements <see cref="ISubscribable"/>).</param>
/// <param name="PathToFullRef">UNS path the script uses → driver-opaque fullRef. Empty map = nothing to subscribe (skipped).</param>
/// <param name="PublishingInterval">Forwarded to the driver's <see cref="ISubscribable.SubscribeAsync"/>.</param>
public sealed record DriverFeed(
ISubscribable Driver,
IReadOnlyDictionary<string, string> PathToFullRef,
TimeSpan PublishingInterval);

View File

@@ -0,0 +1,266 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
/// <summary>
/// Phase 7 follow-up (task #246) — orchestrates the runtime composition of virtual
/// tags + scripted alarms + the historian sink + the driver-bridge that feeds the
/// engines. Called by <see cref="OpcUaServerService"/> after the bootstrap generation
/// loads + before <see cref="OpcUaApplicationHost.StartAsync"/>.
/// </summary>
/// <remarks>
/// <para>
/// <see cref="PrepareAsync"/> reads Script / VirtualTag / ScriptedAlarm rows from
/// the central config DB at the bootstrapped generation, instantiates a
/// <see cref="CachedTagUpstreamSource"/>, runs <see cref="Phase7EngineComposer.Compose"/>,
/// starts a <see cref="DriverSubscriptionBridge"/> per registered driver feeding
/// <see cref="EquipmentNamespaceContent"/>'s tag rows into the cache, and returns
/// the engine-backed <see cref="Core.Abstractions.IReadable"/> sources for
/// <see cref="OpcUaApplicationHost.SetPhase7Sources"/>.
/// </para>
/// <para>
/// <see cref="DisposeAsync"/> tears down the bridge first (so no more events
/// arrive at the cache), then the engines (so cascades + timer ticks stop), then
/// the SQLite sink (which flushes any in-flight drain). Lifetime is owned by the
/// host; <see cref="OpcUaServerService.StopAsync"/> calls dispose during graceful
/// shutdown.
/// </para>
/// </remarks>
public sealed class Phase7Composer : IAsyncDisposable
{
private readonly IServiceScopeFactory _scopeFactory;
private readonly DriverHost _driverHost;
private readonly DriverEquipmentContentRegistry _equipmentRegistry;
private readonly IAlarmHistorianSink _historianSink;
private readonly IAlarmHistorianWriter? _injectedWriter;
private readonly ILoggerFactory _loggerFactory;
private readonly Serilog.ILogger _scriptLogger;
private readonly ILogger<Phase7Composer> _logger;
private DriverSubscriptionBridge? _bridge;
private Phase7ComposedSources _sources = Phase7ComposedSources.Empty;
// Sink we constructed in PrepareAsync (vs. the injected fallback). Held so
// DisposeAsync can flush + tear down the SQLite drain timer.
private SqliteStoreAndForwardSink? _ownedSink;
private bool _disposed;
public Phase7Composer(
IServiceScopeFactory scopeFactory,
DriverHost driverHost,
DriverEquipmentContentRegistry equipmentRegistry,
IAlarmHistorianSink historianSink,
ILoggerFactory loggerFactory,
Serilog.ILogger scriptLogger,
ILogger<Phase7Composer> logger,
IAlarmHistorianWriter? injectedWriter = null)
{
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
_driverHost = driverHost ?? throw new ArgumentNullException(nameof(driverHost));
_equipmentRegistry = equipmentRegistry ?? throw new ArgumentNullException(nameof(equipmentRegistry));
_historianSink = historianSink ?? throw new ArgumentNullException(nameof(historianSink));
_injectedWriter = injectedWriter;
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
_scriptLogger = scriptLogger ?? throw new ArgumentNullException(nameof(scriptLogger));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Phase7ComposedSources Sources => _sources;
public async Task<Phase7ComposedSources> PrepareAsync(long generationId, CancellationToken ct)
{
if (_disposed) throw new ObjectDisposedException(nameof(Phase7Composer));
// Load the three Phase 7 row sets in one DB scope.
List<Script> scripts;
List<VirtualTag> virtualTags;
List<ScriptedAlarm> scriptedAlarms;
using (var scope = _scopeFactory.CreateScope())
{
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
scripts = await db.Scripts.AsNoTracking()
.Where(s => s.GenerationId == generationId).ToListAsync(ct).ConfigureAwait(false);
virtualTags = await db.VirtualTags.AsNoTracking()
.Where(v => v.GenerationId == generationId && v.Enabled).ToListAsync(ct).ConfigureAwait(false);
scriptedAlarms = await db.ScriptedAlarms.AsNoTracking()
.Where(a => a.GenerationId == generationId && a.Enabled).ToListAsync(ct).ConfigureAwait(false);
}
if (virtualTags.Count == 0 && scriptedAlarms.Count == 0)
{
_logger.LogInformation("Phase 7: no virtual tags or scripted alarms in generation {Gen}; engines dormant", generationId);
return Phase7ComposedSources.Empty;
}
var upstream = new CachedTagUpstreamSource();
// Phase 7 follow-up #247 — if any registered driver implements IAlarmHistorianWriter
// (today: GalaxyProxyDriver), wrap it in a SqliteStoreAndForwardSink at
// %ProgramData%/OtOpcUa/alarm-historian-queue.db with the 2s drain cadence the
// sink's docstring recommends. Otherwise fall back to the injected sink (Null in
// the default registration).
var historianSink = ResolveHistorianSink();
_sources = Phase7EngineComposer.Compose(
scripts: scripts,
virtualTags: virtualTags,
scriptedAlarms: scriptedAlarms,
upstream: upstream,
alarmStateStore: new InMemoryAlarmStateStore(),
historianSink: historianSink,
rootScriptLogger: _scriptLogger,
loggerFactory: _loggerFactory);
_logger.LogInformation(
"Phase 7: composed engines from generation {Gen} — {Vt} virtual tag(s), {Al} scripted alarm(s), {Sc} script(s)",
generationId, virtualTags.Count, scriptedAlarms.Count, scripts.Count);
// Build driver feeds from each registered driver's EquipmentNamespaceContent + start
// the bridge. Drivers without populated content (Galaxy SystemPlatform-kind, drivers
// whose Equipment rows haven't been published yet) contribute an empty feed which
// the bridge silently skips.
_bridge = new DriverSubscriptionBridge(upstream, _loggerFactory.CreateLogger<DriverSubscriptionBridge>());
var feeds = BuildDriverFeeds(_driverHost, _equipmentRegistry);
await _bridge.StartAsync(feeds, ct).ConfigureAwait(false);
return _sources;
}
/// <summary>
/// Resolution order for the alarm-historian writer:
/// <list type="number">
/// <item><description>Any registered driver that implements <see cref="IAlarmHistorianWriter"/> (today: none — Galaxy used to via the legacy GalaxyProxyDriver).</description></item>
/// <item><description>The DI-registered <see cref="IAlarmHistorianWriter"/> (PR B.4 — the WonderwareHistorianClient sidecar writer when <c>Historian:Wonderware:Enabled=true</c>).</description></item>
/// <item><description><c>null</c> — caller falls back to the injected <see cref="IAlarmHistorianSink"/> (NullAlarmHistorianSink in the default registration).</description></item>
/// </list>
/// Driver-provided writers win over the DI-registered sidecar so a future
/// GalaxyDriver-as-IAlarmHistorianWriter takes the write path directly,
/// preserving the v1 invariant where a driver that natively owns the
/// historian client doesn't bounce through the sidecar IPC.
/// </summary>
internal static IAlarmHistorianWriter? SelectAlarmHistorianWriter(
DriverHost driverHost,
IAlarmHistorianWriter? injectedWriter,
out string? selectedSourceDescription)
{
foreach (var driverId in driverHost.RegisteredDriverIds)
{
if (driverHost.GetDriver(driverId) is IAlarmHistorianWriter w)
{
selectedSourceDescription = $"driver:{driverId}";
return w;
}
}
if (injectedWriter is not null)
{
selectedSourceDescription = $"di:{injectedWriter.GetType().Name}";
return injectedWriter;
}
selectedSourceDescription = null;
return null;
}
private IAlarmHistorianSink ResolveHistorianSink()
{
var writer = SelectAlarmHistorianWriter(_driverHost, _injectedWriter, out var sourceDescription);
if (writer is null)
{
_logger.LogInformation(
"Phase 7 historian sink: no driver or DI-registered IAlarmHistorianWriter — using {Sink}",
_historianSink.GetType().Name);
return _historianSink;
}
_logger.LogInformation(
"Phase 7 historian sink: IAlarmHistorianWriter resolved from {Source} — SqliteStoreAndForwardSink active",
sourceDescription);
var queueRoot = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
if (string.IsNullOrEmpty(queueRoot)) queueRoot = Path.GetTempPath();
var queueDir = Path.Combine(queueRoot, "OtOpcUa");
Directory.CreateDirectory(queueDir);
var queuePath = Path.Combine(queueDir, "alarm-historian-queue.db");
var sinkLogger = _loggerFactory.CreateLogger<SqliteStoreAndForwardSink>();
// SqliteStoreAndForwardSink wants a Serilog logger for warn-on-eviction emissions;
// bridge the Microsoft logger via Serilog's null-safe path until the sink's
// dependency surface is reshaped (covered as part of release-readiness).
var serilogShim = _scriptLogger.ForContext("HistorianQueuePath", queuePath);
_ownedSink = new SqliteStoreAndForwardSink(
databasePath: queuePath,
writer: writer,
logger: serilogShim);
_ownedSink.StartDrainLoop(TimeSpan.FromSeconds(2));
return _ownedSink;
}
/// <summary>
/// For each registered driver that exposes <see cref="Core.Abstractions.ISubscribable"/>,
/// build a UNS-path → driver-fullRef map from its EquipmentNamespaceContent.
/// Path convention: <c>/{areaName}/{lineName}/{equipmentName}/{tagName}</c> matching
/// what the EquipmentNodeWalker emits into the OPC UA browse tree, so script literals
/// written against the operator-visible tree work without translation.
/// </summary>
internal static IReadOnlyList<DriverFeed> BuildDriverFeeds(
DriverHost driverHost, DriverEquipmentContentRegistry equipmentRegistry)
{
var feeds = new List<DriverFeed>();
foreach (var driverId in driverHost.RegisteredDriverIds)
{
var driver = driverHost.GetDriver(driverId);
if (driver is not Core.Abstractions.ISubscribable subscribable) continue;
var content = equipmentRegistry.Get(driverId);
if (content is null) continue;
var pathToFullRef = MapPathsToFullRefs(content);
if (pathToFullRef.Count == 0) continue;
feeds.Add(new DriverFeed(subscribable, pathToFullRef, TimeSpan.FromSeconds(1)));
}
return feeds;
}
internal static IReadOnlyDictionary<string, string> MapPathsToFullRefs(EquipmentNamespaceContent content)
{
var result = new Dictionary<string, string>(StringComparer.Ordinal);
var areaById = content.Areas.ToDictionary(a => a.UnsAreaId, StringComparer.OrdinalIgnoreCase);
var lineById = content.Lines.ToDictionary(l => l.UnsLineId, StringComparer.OrdinalIgnoreCase);
var equipmentById = content.Equipment.ToDictionary(e => e.EquipmentId, StringComparer.OrdinalIgnoreCase);
foreach (var tag in content.Tags)
{
if (string.IsNullOrEmpty(tag.EquipmentId)) continue;
if (!equipmentById.TryGetValue(tag.EquipmentId!, out var eq)) continue;
if (!lineById.TryGetValue(eq.UnsLineId, out var line)) continue;
if (!areaById.TryGetValue(line.UnsAreaId, out var area)) continue;
var path = $"/{area.Name}/{line.Name}/{eq.Name}/{tag.Name}";
result[path] = tag.TagConfig; // duplicate-path collisions naturally win-last; UI publish-validation rules out duplicate names
}
return result;
}
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
if (_bridge is not null) await _bridge.DisposeAsync().ConfigureAwait(false);
foreach (var d in _sources.Disposables)
{
try { d.Dispose(); }
catch (Exception ex) { _logger.LogWarning(ex, "Phase 7 disposable threw during shutdown"); }
}
// Owned SQLite sink: dispose first so the drain timer stops + final batch flushes
// before we release the writer-bearing driver via DriverHost.DisposeAsync upstream.
_ownedSink?.Dispose();
if (_historianSink is IDisposable disposableSink) disposableSink.Dispose();
}
}

View File

@@ -0,0 +1,208 @@
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Core.Scripting;
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
using ZB.MOM.WW.OtOpcUa.Core.VirtualTags;
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
/// <summary>
/// Phase 7 follow-up (task #243) — maps the generation's <see cref="Script"/> /
/// <see cref="VirtualTag"/> / <see cref="ScriptedAlarm"/> rows into the runtime
/// definitions <see cref="VirtualTagEngine"/> + <see cref="ScriptedAlarmEngine"/>
/// expect, builds the engine instances, and returns the <see cref="IReadable"/>
/// sources plus an <see cref="IAlarmSource"/> for the <c>DriverNodeManager</c>
/// wiring added by task #239.
/// </summary>
/// <remarks>
/// <para>
/// Empty Phase 7 config (no virtual tags + no scripted alarms) is a valid state:
/// <see cref="Compose"/> returns a <see cref="Phase7ComposedSources"/> with null
/// sources so Program.cs can pass them through to <c>OpcUaApplicationHost</c>
/// unchanged — deployments without scripts behave exactly as they did before
/// Phase 7.
/// </para>
/// <para>
/// The caller owns the returned <see cref="Phase7ComposedSources.Disposables"/>
/// and must dispose them on shutdown. Engine cascades + timer ticks run off
/// background threads until then.
/// </para>
/// </remarks>
public static class Phase7EngineComposer
{
public static Phase7ComposedSources Compose(
IReadOnlyList<Script> scripts,
IReadOnlyList<VirtualTag> virtualTags,
IReadOnlyList<ScriptedAlarm> scriptedAlarms,
CachedTagUpstreamSource upstream,
IAlarmStateStore alarmStateStore,
IAlarmHistorianSink historianSink,
Serilog.ILogger rootScriptLogger,
ILoggerFactory loggerFactory)
{
ArgumentNullException.ThrowIfNull(scripts);
ArgumentNullException.ThrowIfNull(virtualTags);
ArgumentNullException.ThrowIfNull(scriptedAlarms);
ArgumentNullException.ThrowIfNull(upstream);
ArgumentNullException.ThrowIfNull(alarmStateStore);
ArgumentNullException.ThrowIfNull(historianSink);
ArgumentNullException.ThrowIfNull(rootScriptLogger);
ArgumentNullException.ThrowIfNull(loggerFactory);
if (virtualTags.Count == 0 && scriptedAlarms.Count == 0)
return Phase7ComposedSources.Empty;
var scriptById = scripts
.Where(s => s.Enabled())
.ToDictionary(s => s.ScriptId, StringComparer.Ordinal);
var scriptLoggerFactory = new ScriptLoggerFactory(rootScriptLogger);
var disposables = new List<IDisposable>();
// Engines take Serilog.ILogger — each engine gets its own so rolling-file emissions
// stay keyed to the right source in the scripts-*.log.
VirtualTagSource? vtSource = null;
if (virtualTags.Count > 0)
{
var vtDefs = ProjectVirtualTags(virtualTags, scriptById).ToList();
var vtEngine = new VirtualTagEngine(upstream, scriptLoggerFactory, rootScriptLogger);
vtEngine.Load(vtDefs);
vtSource = new VirtualTagSource(vtEngine);
disposables.Add(vtEngine);
}
IReadable? alarmReadable = null;
if (scriptedAlarms.Count > 0)
{
var alarmDefs = ProjectScriptedAlarms(scriptedAlarms, scriptById).ToList();
var alarmEngine = new ScriptedAlarmEngine(upstream, alarmStateStore, scriptLoggerFactory, rootScriptLogger);
// Wire alarm emissions to the historian sink (Stream D). Fire-and-forget because
// the sink's EnqueueAsync is already non-blocking from the producer's view.
var engineLogger = loggerFactory.CreateLogger("Phase7HistorianRouter");
alarmEngine.OnEvent += (_, e) => _ = RouteToHistorianAsync(e, historianSink, engineLogger);
alarmEngine.LoadAsync(alarmDefs, CancellationToken.None).GetAwaiter().GetResult();
var alarmSource = new ScriptedAlarmSource(alarmEngine);
// Task #245 — expose each alarm's current Active state as IReadable so OPC UA
// variable reads on Source=ScriptedAlarm nodes return the live predicate truth
// instead of BadNotFound. ScriptedAlarmSource stays registered as IAlarmSource
// for the event stream; the IReadable is a separate adapter over the same engine.
alarmReadable = new ScriptedAlarmReadable(alarmEngine);
disposables.Add(alarmEngine);
disposables.Add(alarmSource);
}
return new Phase7ComposedSources(vtSource, alarmReadable, disposables);
}
internal static IEnumerable<VirtualTagDefinition> ProjectVirtualTags(
IReadOnlyList<VirtualTag> rows, IReadOnlyDictionary<string, Script> scriptById)
{
foreach (var row in rows)
{
if (!row.Enabled) continue;
if (!scriptById.TryGetValue(row.ScriptId, out var script))
throw new InvalidOperationException(
$"VirtualTag '{row.VirtualTagId}' references unknown / disabled Script '{row.ScriptId}' in this generation");
yield return new VirtualTagDefinition(
Path: row.VirtualTagId,
DataType: ParseDataType(row.DataType),
ScriptSource: script.SourceCode,
ChangeTriggered: row.ChangeTriggered,
TimerInterval: row.TimerIntervalMs.HasValue
? TimeSpan.FromMilliseconds(row.TimerIntervalMs.Value)
: null,
Historize: row.Historize);
}
}
internal static IEnumerable<ScriptedAlarmDefinition> ProjectScriptedAlarms(
IReadOnlyList<ScriptedAlarm> rows, IReadOnlyDictionary<string, Script> scriptById)
{
foreach (var row in rows)
{
if (!row.Enabled) continue;
if (!scriptById.TryGetValue(row.PredicateScriptId, out var script))
throw new InvalidOperationException(
$"ScriptedAlarm '{row.ScriptedAlarmId}' references unknown / disabled predicate Script '{row.PredicateScriptId}'");
yield return new ScriptedAlarmDefinition(
AlarmId: row.ScriptedAlarmId,
EquipmentPath: row.EquipmentId,
AlarmName: row.Name,
Kind: ParseAlarmKind(row.AlarmType),
Severity: MapSeverity(row.Severity),
MessageTemplate: row.MessageTemplate,
PredicateScriptSource: script.SourceCode,
HistorizeToAveva: row.HistorizeToAveva,
Retain: row.Retain);
}
}
private static DriverDataType ParseDataType(string raw) =>
Enum.TryParse<DriverDataType>(raw, ignoreCase: true, out var parsed) ? parsed : DriverDataType.String;
private static AlarmKind ParseAlarmKind(string raw) => raw switch
{
"AlarmCondition" => AlarmKind.AlarmCondition,
"LimitAlarm" => AlarmKind.LimitAlarm,
"DiscreteAlarm" => AlarmKind.DiscreteAlarm,
"OffNormalAlarm" => AlarmKind.OffNormalAlarm,
_ => throw new InvalidOperationException($"Unknown AlarmType '{raw}' — DB check constraint should have caught this"),
};
// OPC UA Part 9 severity bands (1..1000) → AlarmSeverity enum. Matches the same
// banding the AB CIP ALMA projection + OpcUaClient MapSeverity use.
private static AlarmSeverity MapSeverity(int s) => s switch
{
<= 250 => AlarmSeverity.Low,
<= 500 => AlarmSeverity.Medium,
<= 750 => AlarmSeverity.High,
_ => AlarmSeverity.Critical,
};
private static async Task RouteToHistorianAsync(
ScriptedAlarmEvent e, IAlarmHistorianSink sink, Microsoft.Extensions.Logging.ILogger log)
{
try
{
var historianEvent = new AlarmHistorianEvent(
AlarmId: e.AlarmId,
EquipmentPath: e.EquipmentPath,
AlarmName: e.AlarmName,
AlarmTypeName: e.Kind.ToString(),
Severity: e.Severity,
EventKind: e.Emission.ToString(),
Message: e.Message,
User: e.Condition.LastAckUser ?? "system",
Comment: e.Condition.LastAckComment,
TimestampUtc: e.TimestampUtc);
await sink.EnqueueAsync(historianEvent, CancellationToken.None).ConfigureAwait(false);
}
catch (Exception ex)
{
log.LogWarning(ex, "Historian enqueue failed for alarm {AlarmId}/{Emission}", e.AlarmId, e.Emission);
}
}
}
/// <summary>What <see cref="Phase7EngineComposer.Compose"/> returns.</summary>
/// <param name="VirtualReadable">Non-null when virtual tags were composed; pass to <c>OpcUaApplicationHost.virtualReadable</c>.</param>
/// <param name="ScriptedAlarmReadable">Non-null when scripted alarms were composed; pass to <c>OpcUaApplicationHost.scriptedAlarmReadable</c>.</param>
/// <param name="Disposables">Engine + source instances the caller owns. Dispose on shutdown.</param>
public sealed record Phase7ComposedSources(
IReadable? VirtualReadable,
IReadable? ScriptedAlarmReadable,
IReadOnlyList<IDisposable> Disposables)
{
public static readonly Phase7ComposedSources Empty =
new(null, null, Array.Empty<IDisposable>());
}
internal static class ScriptEnabledExtensions
{
// Script has no explicit Enabled column; every row in the generation is a live script.
public static bool Enabled(this Script _) => true;
}

View File

@@ -0,0 +1,58 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
/// <summary>
/// <see cref="IReadable"/> adapter exposing each scripted alarm's current
/// <see cref="AlarmActiveState"/> as an OPC UA boolean. Phase 7 follow-up (task #245).
/// </summary>
/// <remarks>
/// <para>
/// Paired with the <see cref="NodeSourceKind.ScriptedAlarm"/> dispatch in
/// <c>DriverNodeManager.OnReadValue</c>. Full-reference lookup is the
/// <c>ScriptedAlarmId</c> the walker wrote into <c>DriverAttributeInfo.FullName</c>
/// when emitting the alarm variable node.
/// </para>
/// <para>
/// Unknown alarm ids return <c>BadNodeIdUnknown</c> so misconfiguration surfaces
/// instead of silently reading <c>false</c>. Alarms whose predicate has never
/// been evaluated (brand new, before the engine's first cascade tick) report
/// <see cref="AlarmActiveState.Inactive"/> via <see cref="AlarmConditionState.Fresh"/>,
/// which matches the Part 9 initial-state semantics.
/// </para>
/// </remarks>
public sealed class ScriptedAlarmReadable : IReadable
{
/// <summary>OPC UA <c>StatusCodes.BadNodeIdUnknown</c> — kept local so we don't pull the OPC stack.</summary>
private const uint BadNodeIdUnknown = 0x80340000;
private readonly ScriptedAlarmEngine _engine;
public ScriptedAlarmReadable(ScriptedAlarmEngine engine)
{
_engine = engine ?? throw new ArgumentNullException(nameof(engine));
}
public Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(fullReferences);
var now = DateTime.UtcNow;
var results = new DataValueSnapshot[fullReferences.Count];
for (var i = 0; i < fullReferences.Count; i++)
{
var alarmId = fullReferences[i];
var state = _engine.GetState(alarmId);
if (state is null)
{
results[i] = new DataValueSnapshot(null, BadNodeIdUnknown, null, now);
continue;
}
var active = state.Active == AlarmActiveState.Active;
results[i] = new DataValueSnapshot(active, 0u, now, now);
}
return Task.FromResult<IReadOnlyList<DataValueSnapshot>>(results);
}
}

View File

@@ -0,0 +1,252 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog;
using Serilog.Formatting.Compact;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
using ZB.MOM.WW.OtOpcUa.Driver.AbCip;
using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy;
using ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
using ZB.MOM.WW.OtOpcUa.Driver.S7;
using ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
using ZB.MOM.WW.OtOpcUa.Server;
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
using ZB.MOM.WW.OtOpcUa.Server.History;
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
using ZB.MOM.WW.OtOpcUa.Server.Phase7;
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
using ZB.MOM.WW.OtOpcUa.Server.Security;
var builder = Host.CreateApplicationBuilder(args);
// Per Phase 6.1 Stream C.3: SIEMs (Splunk, Datadog) ingest the JSON file without a
// regex parser. Plain-text rolling file stays on by default for human readability;
// JSON file is opt-in via appsetting `Serilog:WriteJson = true`.
var writeJson = builder.Configuration.GetValue<bool>("Serilog:WriteJson");
var loggerBuilder = new LoggerConfiguration()
.ReadFrom.Configuration(builder.Configuration)
.Enrich.FromLogContext()
.WriteTo.Console()
.WriteTo.File("logs/otopcua-.log", rollingInterval: RollingInterval.Day);
if (writeJson)
{
loggerBuilder = loggerBuilder.WriteTo.File(
new CompactJsonFormatter(),
"logs/otopcua-.json.log",
rollingInterval: RollingInterval.Day);
}
Log.Logger = loggerBuilder.CreateLogger();
builder.Services.AddSerilog();
builder.Services.AddWindowsService(o => o.ServiceName = "OtOpcUa");
var nodeSection = builder.Configuration.GetSection(NodeOptions.SectionName);
var options = new NodeOptions
{
NodeId = nodeSection.GetValue<string>("NodeId")
?? throw new InvalidOperationException("Node:NodeId not configured"),
ClusterId = nodeSection.GetValue<string>("ClusterId")
?? throw new InvalidOperationException("Node:ClusterId not configured"),
ConfigDbConnectionString = nodeSection.GetValue<string>("ConfigDbConnectionString")
?? throw new InvalidOperationException("Node:ConfigDbConnectionString not configured"),
LocalCachePath = nodeSection.GetValue<string>("LocalCachePath") ?? "config_cache.db",
};
var opcUaSection = builder.Configuration.GetSection(OpcUaServerOptions.SectionName);
var ldapSection = opcUaSection.GetSection("Ldap");
var ldapOptions = new LdapOptions
{
Enabled = ldapSection.GetValue<bool?>("Enabled") ?? false,
Server = ldapSection.GetValue<string>("Server") ?? "localhost",
Port = ldapSection.GetValue<int?>("Port") ?? 3893,
UseTls = ldapSection.GetValue<bool?>("UseTls") ?? false,
AllowInsecureLdap = ldapSection.GetValue<bool?>("AllowInsecureLdap") ?? true,
SearchBase = ldapSection.GetValue<string>("SearchBase") ?? "dc=lmxopcua,dc=local",
ServiceAccountDn = ldapSection.GetValue<string>("ServiceAccountDn") ?? string.Empty,
ServiceAccountPassword = ldapSection.GetValue<string>("ServiceAccountPassword") ?? string.Empty,
GroupToRole = ldapSection.GetSection("GroupToRole").Get<Dictionary<string, string>>() ?? new(StringComparer.OrdinalIgnoreCase),
};
var opcUaOptions = new OpcUaServerOptions
{
EndpointUrl = opcUaSection.GetValue<string>("EndpointUrl") ?? "opc.tcp://0.0.0.0:4840/OtOpcUa",
ApplicationName = opcUaSection.GetValue<string>("ApplicationName") ?? "OtOpcUa Server",
ApplicationUri = opcUaSection.GetValue<string>("ApplicationUri") ?? "urn:OtOpcUa:Server",
PkiStoreRoot = opcUaSection.GetValue<string>("PkiStoreRoot")
?? Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "OtOpcUa", "pki"),
AutoAcceptUntrustedClientCertificates = opcUaSection.GetValue<bool?>("AutoAcceptUntrustedClientCertificates") ?? true,
SecurityProfile = Enum.TryParse<OpcUaSecurityProfile>(opcUaSection.GetValue<string>("SecurityProfile"), true, out var p)
? p : OpcUaSecurityProfile.None,
Ldap = ldapOptions,
AnonymousRoles = opcUaSection.GetSection("AnonymousRoles").Get<string[]>() ?? [],
};
builder.Services.AddSingleton(options);
builder.Services.AddSingleton(opcUaOptions);
builder.Services.AddSingleton(ldapOptions);
builder.Services.AddSingleton<IUserAuthenticator>(sp => ldapOptions.Enabled
? new LdapUserAuthenticator(ldapOptions, sp.GetRequiredService<ILogger<LdapUserAuthenticator>>())
: new DenyAllUserAuthenticator());
builder.Services.AddSingleton<ILocalConfigCache>(_ => new LiteDbConfigCache(options.LocalCachePath));
builder.Services.AddSingleton<DriverHost>();
builder.Services.AddSingleton<NodeBootstrap>();
// Task #248 — driver-instance bootstrap pipeline. DriverFactoryRegistry is the
// type-name → factory map; each driver project's static Register call pre-loads
// its factory so the bootstrapper can materialise DriverInstance rows from the
// central DB into live IDriver instances.
builder.Services.AddSingleton<DriverFactoryRegistry>(_ =>
{
var registry = new DriverFactoryRegistry();
// Galaxy access flows through the in-process GalaxyDriver (DriverType =
// "GalaxyMxGateway") talking gRPC to the mxaccessgw worker. The legacy
// out-of-process GalaxyProxyDriver retired in PR 7.2 once the parity matrix
// (docs/v2/Galaxy.ParityMatrix.md) verified equivalence.
ZB.MOM.WW.OtOpcUa.Driver.Galaxy.GalaxyDriverFactoryExtensions.Register(registry);
FocasDriverFactoryExtensions.Register(registry);
ModbusDriverFactoryExtensions.Register(registry);
AbCipDriverFactoryExtensions.Register(registry);
AbLegacyDriverFactoryExtensions.Register(registry);
S7DriverFactoryExtensions.Register(registry);
TwinCATDriverFactoryExtensions.Register(registry);
return registry;
});
builder.Services.AddSingleton<DriverInstanceBootstrapper>();
// Phase 6.1 Stream B.4 (task #137) — ScheduledRecycleHostedService. Empty scheduler
// list by default; DriverInstanceBootstrapper calls AddScheduler for any Tier C driver
// whose ResilienceConfig carries a RecycleIntervalSeconds AND has an IDriverSupervisor
// registered in DI. Registered as singleton so DriverInstanceBootstrapper can inject
// the same instance that the BackgroundService loop drives.
builder.Services.AddSingleton<ScheduledRecycleHostedService>();
builder.Services.AddHostedService(sp => sp.GetRequiredService<ScheduledRecycleHostedService>());
// ADR-001 Option A wiring — the registry is the handoff between OpcUaServerService's
// bootstrap-time population pass + OpcUaApplicationHost's StartAsync walker invocation.
// DriverEquipmentContentRegistry.Get is the equipmentContentLookup delegate that PR #155
// added to OpcUaApplicationHost's ctor seam.
builder.Services.AddSingleton<DriverEquipmentContentRegistry>();
builder.Services.AddScoped<EquipmentNamespaceContentLoader>();
// Phase 6.2 Stream C wiring — constructs AuthorizationGate + NodeScopeResolver from the
// published generation's NodeAcl rows + per-driver EquipmentNamespaceContent. Gated by
// NodeOptions.Authorization.Enabled (default false) so existing deployments don't flip
// to ACL enforcement accidentally on upgrade.
builder.Services.AddSingleton<AuthorizationBootstrap>();
// PR 1+2.W — server-level history routing + alarm-condition state machine. Singletons
// shared across every DriverNodeManager. The alarm service runs the Active /
// Acknowledged / Inactive state machine for any driver that declares alarms via
// AlarmConditionInfo's sub-attribute refs.
builder.Services.AddSingleton<IHistoryRouter, HistoryRouter>();
builder.Services.AddSingleton<AlarmConditionService>();
// PR 3.W — Wonderware historian sidecar wiring. Reads Historian:Wonderware:* from
// configuration; when Enabled=true, registers the .NET 10 client as both an
// IHistorianDataSource (via IHistoryRouter under the configured driver instance
// prefix; defaults to "galaxy") and an IAlarmHistorianWriter (consumed by the
// SqliteStoreAndForwardSink drain worker once task #248 wires it). Disabled
// deployments fall back to DriverNodeManager's legacy IHistoryProvider adapter
// for the read path and NullAlarmHistorianSink for the write path — keeping the
// sidecar fully optional until the legacy paths retire in PR 7.2.
var wonderwareSection = builder.Configuration.GetSection("Historian:Wonderware");
var wonderwareEnabled = wonderwareSection.GetValue("Enabled", false);
if (wonderwareEnabled)
{
var wonderwarePrefix = wonderwareSection.GetValue("DriverInstancePrefix", "galaxy")
?? throw new InvalidOperationException("Historian:Wonderware:DriverInstancePrefix must be a string when configured.");
var wonderwareOptions = new WonderwareHistorianClientOptions(
PipeName: wonderwareSection.GetValue<string>("PipeName")
?? throw new InvalidOperationException("Historian:Wonderware:PipeName must be set when Enabled=true."),
SharedSecret: wonderwareSection.GetValue<string>("SharedSecret")
?? throw new InvalidOperationException("Historian:Wonderware:SharedSecret must be set when Enabled=true."),
PeerName: wonderwareSection.GetValue("PeerName", $"OtOpcUa-{options.NodeId}") ?? "OtOpcUa",
ConnectTimeout: TimeSpan.FromSeconds(wonderwareSection.GetValue("ConnectTimeoutSeconds", 10)),
CallTimeout: TimeSpan.FromSeconds(wonderwareSection.GetValue("CallTimeoutSeconds", 30)));
builder.Services.AddSingleton(wonderwareOptions);
builder.Services.AddSingleton<WonderwareHistorianClient>();
builder.Services.AddSingleton<IAlarmHistorianWriter>(sp => sp.GetRequiredService<WonderwareHistorianClient>());
builder.Services.AddHostedService(sp => new WonderwareHistorianBootstrap(
sp.GetRequiredService<IHistoryRouter>(),
sp.GetRequiredService<WonderwareHistorianClient>(),
wonderwarePrefix,
sp.GetRequiredService<ILogger<WonderwareHistorianBootstrap>>()));
}
builder.Services.AddSingleton<OpcUaApplicationHost>(sp =>
{
var registry = sp.GetRequiredService<DriverEquipmentContentRegistry>();
return new OpcUaApplicationHost(
sp.GetRequiredService<OpcUaServerOptions>(),
sp.GetRequiredService<DriverHost>(),
sp.GetRequiredService<IUserAuthenticator>(),
sp.GetRequiredService<ILoggerFactory>(),
sp.GetRequiredService<ILogger<OpcUaApplicationHost>>(),
equipmentContentLookup: registry.Get,
historyRouter: sp.GetRequiredService<IHistoryRouter>(),
alarmConditionService: sp.GetRequiredService<AlarmConditionService>());
});
builder.Services.AddHostedService<OpcUaServerService>();
// Central-config DB access for the host-status publisher (LMX follow-up #7). Scoped context
// so per-heartbeat change-tracking stays isolated; publisher opens one scope per tick.
builder.Services.AddDbContext<OtOpcUaConfigDbContext>(opt =>
opt.UseSqlServer(options.ConfigDbConnectionString));
// Additional pooled factory so Phase 6.3 RedundancyCoordinator (singleton) can create its
// own scoped DbContext for topology loading without fighting the scoped HostStatusPublisher.
builder.Services.AddDbContextFactory<OtOpcUaConfigDbContext>(opt =>
opt.UseSqlServer(options.ConfigDbConnectionString));
builder.Services.AddHostedService<HostStatusPublisher>();
// Phase 6.3 Stream C (task #147) — ServiceLevel + ServerUriArray + RedundancySupport node
// wiring. Coordinator holds topology; publisher computes ServiceLevel byte + ServerUriArray;
// hosted service ticks publisher + pushes values onto the Server object via the node writer.
builder.Services.AddSingleton(sp => new RedundancyCoordinator(
sp.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>(),
sp.GetRequiredService<ILogger<RedundancyCoordinator>>(),
options.NodeId, options.ClusterId));
builder.Services.AddSingleton<ApplyLeaseRegistry>();
builder.Services.AddSingleton<RecoveryStateManager>();
builder.Services.AddSingleton<PeerReachabilityTracker>();
builder.Services.AddSingleton(sp => new RedundancyStatePublisher(
sp.GetRequiredService<RedundancyCoordinator>(),
sp.GetRequiredService<ApplyLeaseRegistry>(),
sp.GetRequiredService<RecoveryStateManager>(),
sp.GetRequiredService<PeerReachabilityTracker>()));
builder.Services.AddHostedService<RedundancyPublisherHostedService>();
// Phase 6.3 Stream B — two-layer peer-probe loops populating PeerReachabilityTracker.
// Without these the publisher sees PeerReachability.Unknown for every peer and degrades
// to the Isolated-Primary band (230) even when the peer is up. Safe default but not the
// full non-transparent-redundancy UX.
builder.Services.AddSingleton<PeerProbeOptions>();
builder.Services.AddHttpClient(PeerHttpProbeLoop.HttpClientName);
builder.Services.AddHostedService<PeerHttpProbeLoop>();
builder.Services.AddHostedService<PeerUaProbeLoop>();
// Phase 6.3 A.2 + 6.1 Stream D — periodic generation refresh. Detects peer-published
// generations, opens an ApplyLeaseRegistry lease during the refresh window (so the
// publisher surfaces PrimaryMidApply=200 instead of sitting at PrimaryHealthy=255
// through the apply), and calls coordinator.RefreshAsync to pick up topology changes.
builder.Services.AddHostedService<GenerationRefreshHostedService>();
// Phase 7 follow-up #246 — historian sink + engine composer. NullAlarmHistorianSink
// is the default until the Galaxy.Host SqliteStoreAndForwardSink writer adapter
// lands (task #248). The composer reads Script/VirtualTag/ScriptedAlarm rows on
// generation bootstrap, builds the engines, and starts the driver-bridge feed.
builder.Services.AddSingleton<IAlarmHistorianSink>(NullAlarmHistorianSink.Instance);
builder.Services.AddSingleton(Log.Logger); // Serilog root for ScriptLoggerFactory
builder.Services.AddSingleton<Phase7Composer>();
var host = builder.Build();
await host.RunAsync();

View File

@@ -0,0 +1,85 @@
using System.Collections.Concurrent;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Tracks in-progress publish-generation apply leases keyed on
/// <c>(ConfigGenerationId, PublishRequestId)</c>. Per decision #162 a sealed lease pattern
/// ensures <see cref="IsApplyInProgress"/> reflects every exit path (success / exception /
/// cancellation) because the IAsyncDisposable returned by <see cref="BeginApplyLease"/>
/// decrements unconditionally.
/// </summary>
/// <remarks>
/// A watchdog loop calls <see cref="PruneStale"/> periodically with the configured
/// <see cref="ApplyMaxDuration"/>; any lease older than that is force-closed so a crashed
/// publisher can't pin the node at <see cref="ServiceLevelBand.PrimaryMidApply"/>.
/// </remarks>
public sealed class ApplyLeaseRegistry
{
private readonly ConcurrentDictionary<LeaseKey, DateTime> _leases = new();
private readonly TimeProvider _timeProvider;
public TimeSpan ApplyMaxDuration { get; }
public ApplyLeaseRegistry(TimeSpan? applyMaxDuration = null, TimeProvider? timeProvider = null)
{
ApplyMaxDuration = applyMaxDuration ?? TimeSpan.FromMinutes(10);
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <summary>
/// Register a new lease. Returns an <see cref="IAsyncDisposable"/> whose disposal
/// decrements the registry; use <c>await using</c> in the caller so every exit path
/// closes the lease.
/// </summary>
public IAsyncDisposable BeginApplyLease(long generationId, Guid publishRequestId)
{
var key = new LeaseKey(generationId, publishRequestId);
_leases[key] = _timeProvider.GetUtcNow().UtcDateTime;
return new LeaseScope(this, key);
}
/// <summary>True when at least one apply lease is currently open.</summary>
public bool IsApplyInProgress => !_leases.IsEmpty;
/// <summary>Current open-lease count — diagnostics only.</summary>
public int OpenLeaseCount => _leases.Count;
/// <summary>Force-close any lease older than <see cref="ApplyMaxDuration"/>. Watchdog tick.</summary>
/// <returns>Number of leases the watchdog closed on this tick.</returns>
public int PruneStale()
{
var now = _timeProvider.GetUtcNow().UtcDateTime;
var closed = 0;
foreach (var kv in _leases)
{
if (now - kv.Value > ApplyMaxDuration && _leases.TryRemove(kv.Key, out _))
closed++;
}
return closed;
}
private void Release(LeaseKey key) => _leases.TryRemove(key, out _);
private readonly record struct LeaseKey(long GenerationId, Guid PublishRequestId);
private sealed class LeaseScope : IAsyncDisposable
{
private readonly ApplyLeaseRegistry _owner;
private readonly LeaseKey _key;
private int _disposed;
public LeaseScope(ApplyLeaseRegistry owner, LeaseKey key)
{
_owner = owner;
_key = key;
}
public ValueTask DisposeAsync()
{
if (Interlocked.Exchange(ref _disposed, 1) == 0)
_owner.Release(_key);
return ValueTask.CompletedTask;
}
}
}

View File

@@ -0,0 +1,96 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Pure-function mapper from the shared config DB's <see cref="ServerCluster"/> +
/// <see cref="ClusterNode"/> rows to an immutable <see cref="RedundancyTopology"/>.
/// Validates Phase 6.3 Stream A.1 invariants and throws
/// <see cref="InvalidTopologyException"/> on violation so the coordinator can fail startup
/// fast with a clear message rather than boot into an ambiguous state.
/// </summary>
/// <remarks>
/// Stateless — the caller owns the DB round-trip + hands rows in. Keeping it pure makes
/// the invariant matrix testable without EF or SQL Server.
/// </remarks>
public static class ClusterTopologyLoader
{
/// <summary>Build a topology snapshot for the given self node. Throws on invariant violation.</summary>
public static RedundancyTopology Load(string selfNodeId, ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
{
ArgumentException.ThrowIfNullOrWhiteSpace(selfNodeId);
ArgumentNullException.ThrowIfNull(cluster);
ArgumentNullException.ThrowIfNull(nodes);
ValidateClusterShape(cluster, nodes);
ValidateUniqueApplicationUris(nodes);
ValidatePrimaryCount(cluster, nodes);
var self = nodes.FirstOrDefault(n => string.Equals(n.NodeId, selfNodeId, StringComparison.OrdinalIgnoreCase))
?? throw new InvalidTopologyException(
$"Self node '{selfNodeId}' is not a member of cluster '{cluster.ClusterId}'. " +
$"Members: {string.Join(", ", nodes.Select(n => n.NodeId))}.");
var peers = nodes
.Where(n => !string.Equals(n.NodeId, selfNodeId, StringComparison.OrdinalIgnoreCase))
.Select(n => new RedundancyPeer(
NodeId: n.NodeId,
Role: n.RedundancyRole,
Host: n.Host,
OpcUaPort: n.OpcUaPort,
DashboardPort: n.DashboardPort,
ApplicationUri: n.ApplicationUri))
.ToList();
return new RedundancyTopology(
ClusterId: cluster.ClusterId,
SelfNodeId: self.NodeId,
SelfRole: self.RedundancyRole,
Mode: cluster.RedundancyMode,
Peers: peers,
SelfApplicationUri: self.ApplicationUri);
}
private static void ValidateClusterShape(ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
{
if (nodes.Count == 0)
throw new InvalidTopologyException($"Cluster '{cluster.ClusterId}' has zero nodes.");
// Decision #83 — v2.0 caps clusters at two nodes.
if (nodes.Count > 2)
throw new InvalidTopologyException(
$"Cluster '{cluster.ClusterId}' has {nodes.Count} nodes. v2.0 supports at most 2 nodes per cluster (decision #83).");
// Every node must belong to the given cluster.
var wrongCluster = nodes.FirstOrDefault(n =>
!string.Equals(n.ClusterId, cluster.ClusterId, StringComparison.OrdinalIgnoreCase));
if (wrongCluster is not null)
throw new InvalidTopologyException(
$"Node '{wrongCluster.NodeId}' belongs to cluster '{wrongCluster.ClusterId}', not '{cluster.ClusterId}'.");
}
private static void ValidateUniqueApplicationUris(IReadOnlyList<ClusterNode> nodes)
{
var dup = nodes
.GroupBy(n => n.ApplicationUri, StringComparer.Ordinal)
.FirstOrDefault(g => g.Count() > 1);
if (dup is not null)
throw new InvalidTopologyException(
$"Nodes {string.Join(", ", dup.Select(n => n.NodeId))} share ApplicationUri '{dup.Key}'. " +
$"OPC UA Part 4 requires unique ApplicationUri per server — clients pin trust here (decision #86).");
}
private static void ValidatePrimaryCount(ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
{
// Standalone mode: any role is fine. Warm / Hot: at most one Primary per cluster.
if (cluster.RedundancyMode == RedundancyMode.None) return;
var primaries = nodes.Count(n => n.RedundancyRole == RedundancyRole.Primary);
if (primaries > 1)
throw new InvalidTopologyException(
$"Cluster '{cluster.ClusterId}' has {primaries} Primary nodes in redundancy mode {cluster.RedundancyMode}. " +
$"At most one Primary per cluster (decision #84). Runtime detects and demotes both to ServiceLevel 2 " +
$"per the 8-state matrix; startup fails fast to surface the misconfiguration earlier.");
}
}

View File

@@ -0,0 +1,42 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Latest observed reachability of the peer node per the Phase 6.3 Stream B.1/B.2 two-layer
/// probe model. HTTP layer is the fast-fail; UA layer is authoritative.
/// </summary>
/// <remarks>
/// Fed into the <see cref="ServiceLevelCalculator"/> as <c>peerHttpHealthy</c> +
/// <c>peerUaHealthy</c>. The concrete probe loops (<c>PeerHttpProbeLoop</c> +
/// <c>PeerUaProbeLoop</c>) live in a Stream B runtime follow-up — this type is the
/// contract the publisher reads; probers write via
/// <see cref="PeerReachabilityTracker"/>.
/// </remarks>
public sealed record PeerReachability(bool HttpHealthy, bool UaHealthy)
{
public static readonly PeerReachability Unknown = new(false, false);
public static readonly PeerReachability FullyHealthy = new(true, true);
/// <summary>True when both probes report healthy — the <c>ServiceLevelCalculator</c>'s peerReachable gate.</summary>
public bool BothHealthy => HttpHealthy && UaHealthy;
}
/// <summary>
/// Thread-safe holder of the latest <see cref="PeerReachability"/> per peer NodeId. Probe
/// loops call <see cref="Update"/>; the <see cref="RedundancyStatePublisher"/> reads via
/// <see cref="Get"/>.
/// </summary>
public sealed class PeerReachabilityTracker
{
private readonly System.Collections.Concurrent.ConcurrentDictionary<string, PeerReachability> _byPeer =
new(StringComparer.OrdinalIgnoreCase);
public void Update(string peerNodeId, PeerReachability reachability)
{
ArgumentException.ThrowIfNullOrWhiteSpace(peerNodeId);
_byPeer[peerNodeId] = reachability ?? throw new ArgumentNullException(nameof(reachability));
}
/// <summary>Current reachability for a peer. Returns <see cref="PeerReachability.Unknown"/> when not yet probed.</summary>
public PeerReachability Get(string peerNodeId) =>
_byPeer.TryGetValue(peerNodeId, out var r) ? r : PeerReachability.Unknown;
}

View File

@@ -0,0 +1,65 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Tracks the Recovering-band dwell for a node after a <c>Faulted → Healthy</c> transition.
/// Per decision #154 and Phase 6.3 Stream B.4 a node that has just returned to health stays
/// in the Recovering band (180 Primary / 30 Backup) until BOTH: (a) the configured
/// <see cref="DwellTime"/> has elapsed, AND (b) at least one successful publish-witness
/// read has been observed.
/// </summary>
/// <remarks>
/// Purely in-memory, no I/O. The coordinator feeds events into <see cref="MarkFaulted"/>,
/// <see cref="MarkRecovered"/>, and <see cref="RecordPublishWitness"/>; <see cref="IsDwellMet"/>
/// becomes true only after both conditions converge.
/// </remarks>
public sealed class RecoveryStateManager
{
private readonly TimeSpan _dwellTime;
private readonly TimeProvider _timeProvider;
/// <summary>Last time the node transitioned Faulted → Healthy. Null until first recovery.</summary>
private DateTime? _recoveredUtc;
/// <summary>True once a publish-witness read has succeeded after the last recovery.</summary>
private bool _witnessed;
public TimeSpan DwellTime => _dwellTime;
public RecoveryStateManager(TimeSpan? dwellTime = null, TimeProvider? timeProvider = null)
{
_dwellTime = dwellTime ?? TimeSpan.FromSeconds(60);
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <summary>Report that the node has entered the Faulted state.</summary>
public void MarkFaulted()
{
_recoveredUtc = null;
_witnessed = false;
}
/// <summary>Report that the node has transitioned Faulted → Healthy; dwell clock starts now.</summary>
public void MarkRecovered()
{
_recoveredUtc = _timeProvider.GetUtcNow().UtcDateTime;
_witnessed = false;
}
/// <summary>Report a successful publish-witness read.</summary>
public void RecordPublishWitness() => _witnessed = true;
/// <summary>
/// True when the dwell is considered met: either the node never faulted in the first
/// place, or both (dwell time elapsed + publish witness recorded) since the last
/// recovery. False means the coordinator should report Recovering-band ServiceLevel.
/// </summary>
public bool IsDwellMet()
{
if (_recoveredUtc is null) return true; // never faulted → dwell N/A
if (!_witnessed) return false;
var elapsed = _timeProvider.GetUtcNow().UtcDateTime - _recoveredUtc.Value;
return elapsed >= _dwellTime;
}
}

View File

@@ -0,0 +1,107 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Process-singleton holder of the current <see cref="RedundancyTopology"/>. Reads the
/// shared config DB at <see cref="InitializeAsync"/> time + re-reads on
/// <see cref="RefreshAsync"/> (called after <c>sp_PublishGeneration</c> completes so
/// operator role-swaps take effect without a process restart).
/// </summary>
/// <remarks>
/// <para>Per Phase 6.3 Stream A.1-A.2. The coordinator is the source of truth for the
/// <see cref="ServiceLevelCalculator"/> inputs: role (from topology), peer reachability
/// (from peer-probe loops — Stream B.1/B.2 follow-up), apply-in-progress (from
/// <see cref="ApplyLeaseRegistry"/>), topology-valid (from invariant checks at load time
/// + runtime detection of conflicting peer claims).</para>
///
/// <para>Topology refresh is CAS-style: a new <see cref="RedundancyTopology"/> instance
/// replaces the old one atomically via <see cref="Interlocked.Exchange{T}"/>. Readers
/// always see a coherent snapshot — never a partial transition.</para>
/// </remarks>
public sealed class RedundancyCoordinator
{
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbContextFactory;
private readonly ILogger<RedundancyCoordinator> _logger;
private readonly string _selfNodeId;
private readonly string _selfClusterId;
private RedundancyTopology? _current;
private bool _topologyValid = true;
public RedundancyCoordinator(
IDbContextFactory<OtOpcUaConfigDbContext> dbContextFactory,
ILogger<RedundancyCoordinator> logger,
string selfNodeId,
string selfClusterId)
{
ArgumentException.ThrowIfNullOrWhiteSpace(selfNodeId);
ArgumentException.ThrowIfNullOrWhiteSpace(selfClusterId);
_dbContextFactory = dbContextFactory;
_logger = logger;
_selfNodeId = selfNodeId;
_selfClusterId = selfClusterId;
}
/// <summary>Last-loaded topology; null before <see cref="InitializeAsync"/> completes.</summary>
public RedundancyTopology? Current => Volatile.Read(ref _current);
/// <summary>
/// True when the last load/refresh completed without an invariant violation; false
/// forces <see cref="ServiceLevelCalculator"/> into the <see cref="ServiceLevelBand.InvalidTopology"/>
/// band regardless of other inputs.
/// </summary>
public bool IsTopologyValid => Volatile.Read(ref _topologyValid);
/// <summary>Load the topology for the first time. Throws on invariant violation.</summary>
public async Task InitializeAsync(CancellationToken ct)
{
await RefreshInternalAsync(throwOnInvalid: true, ct).ConfigureAwait(false);
}
/// <summary>
/// Re-read the topology from the shared DB. Called after <c>sp_PublishGeneration</c>
/// completes or after an Admin-triggered role-swap. Never throws — on invariant
/// violation it logs + flips <see cref="IsTopologyValid"/> false so the calculator
/// returns <see cref="ServiceLevelBand.InvalidTopology"/> = 2.
/// </summary>
public async Task RefreshAsync(CancellationToken ct)
{
await RefreshInternalAsync(throwOnInvalid: false, ct).ConfigureAwait(false);
}
private async Task RefreshInternalAsync(bool throwOnInvalid, CancellationToken ct)
{
await using var db = await _dbContextFactory.CreateDbContextAsync(ct).ConfigureAwait(false);
var cluster = await db.ServerClusters.AsNoTracking()
.FirstOrDefaultAsync(c => c.ClusterId == _selfClusterId, ct).ConfigureAwait(false)
?? throw new InvalidTopologyException($"Cluster '{_selfClusterId}' not found in config DB.");
var nodes = await db.ClusterNodes.AsNoTracking()
.Where(n => n.ClusterId == _selfClusterId && n.Enabled)
.ToListAsync(ct).ConfigureAwait(false);
try
{
var topology = ClusterTopologyLoader.Load(_selfNodeId, cluster, nodes);
Volatile.Write(ref _current, topology);
Volatile.Write(ref _topologyValid, true);
_logger.LogInformation(
"Redundancy topology loaded: cluster={Cluster} self={Self} role={Role} mode={Mode} peers={PeerCount}",
topology.ClusterId, topology.SelfNodeId, topology.SelfRole, topology.Mode, topology.PeerCount);
}
catch (InvalidTopologyException ex)
{
Volatile.Write(ref _topologyValid, false);
_logger.LogError(ex,
"Redundancy topology invariant violation for cluster {Cluster}: {Reason}",
_selfClusterId, ex.Message);
if (throwOnInvalid) throw;
}
}
}

View File

@@ -0,0 +1,142 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Orchestrates Phase 6.3 Stream C: feeds the <see cref="ServiceLevelCalculator"/> with the
/// current (topology, peer reachability, apply-in-progress, recovery dwell, self health)
/// inputs and emits the resulting <see cref="byte"/> + labelled <see cref="ServiceLevelBand"/>
/// to subscribers. The OPC UA <c>ServiceLevel</c> variable node consumes this via
/// <see cref="OnStateChanged"/> on every tick.
/// </summary>
/// <remarks>
/// Pure orchestration — no background timer, no OPC UA stack dep. The caller (a
/// HostedService in a future PR, or a test) drives <see cref="ComputeAndPublish"/> at
/// whatever cadence is appropriate. Each call reads the inputs + recomputes the ServiceLevel
/// byte; state is fired on the <see cref="OnStateChanged"/> event when the byte differs from
/// the last emitted value (edge-triggered). The <see cref="OnServerUriArrayChanged"/> event
/// fires whenever the topology's <c>ServerUriArray</c> content changes.
/// </remarks>
public sealed class RedundancyStatePublisher
{
private readonly RedundancyCoordinator _coordinator;
private readonly ApplyLeaseRegistry _leases;
private readonly RecoveryStateManager _recovery;
private readonly PeerReachabilityTracker _peers;
private readonly Func<bool> _selfHealthy;
private readonly Func<bool> _operatorMaintenance;
private byte _lastByte = 255; // start at Authoritative — harmless before first tick
private IReadOnlyList<string>? _lastServerUriArray;
public RedundancyStatePublisher(
RedundancyCoordinator coordinator,
ApplyLeaseRegistry leases,
RecoveryStateManager recovery,
PeerReachabilityTracker peers,
Func<bool>? selfHealthy = null,
Func<bool>? operatorMaintenance = null)
{
ArgumentNullException.ThrowIfNull(coordinator);
ArgumentNullException.ThrowIfNull(leases);
ArgumentNullException.ThrowIfNull(recovery);
ArgumentNullException.ThrowIfNull(peers);
_coordinator = coordinator;
_leases = leases;
_recovery = recovery;
_peers = peers;
_selfHealthy = selfHealthy ?? (() => true);
_operatorMaintenance = operatorMaintenance ?? (() => false);
}
/// <summary>
/// Fires with the current ServiceLevel byte + band on every call to
/// <see cref="ComputeAndPublish"/> when the byte differs from the previously-emitted one.
/// </summary>
public event Action<ServiceLevelSnapshot>? OnStateChanged;
/// <summary>
/// Fires when the cluster's ServerUriArray (self + peers) content changes — e.g. an
/// operator adds or removes a peer. Consumer is the OPC UA <c>ServerUriArray</c>
/// variable node in Stream C.2.
/// </summary>
public event Action<IReadOnlyList<string>>? OnServerUriArrayChanged;
/// <summary>Snapshot of the last-published ServiceLevel byte — diagnostics + tests.</summary>
public byte LastByte => _lastByte;
/// <summary>
/// Compute the current ServiceLevel + emit change events if anything moved. Caller
/// drives cadence — a 1 s tick in production is reasonable; tests drive it directly.
/// </summary>
public ServiceLevelSnapshot ComputeAndPublish()
{
var topology = _coordinator.Current;
if (topology is null)
{
// Not yet initialized — surface NoData so clients don't treat us as authoritative.
return Emit((byte)ServiceLevelBand.NoData, null);
}
// Aggregate peer reachability. For 2-node v2.0 clusters there is at most one peer;
// treat "all peers healthy" as the boolean input to the calculator.
var peerReachable = topology.Peers.All(p => _peers.Get(p.NodeId).BothHealthy);
var peerUaHealthy = topology.Peers.All(p => _peers.Get(p.NodeId).UaHealthy);
var peerHttpHealthy = topology.Peers.All(p => _peers.Get(p.NodeId).HttpHealthy);
var role = MapRole(topology.SelfRole);
var value = ServiceLevelCalculator.Compute(
role: role,
selfHealthy: _selfHealthy(),
peerUaHealthy: peerUaHealthy,
peerHttpHealthy: peerHttpHealthy,
applyInProgress: _leases.IsApplyInProgress,
recoveryDwellMet: _recovery.IsDwellMet(),
topologyValid: _coordinator.IsTopologyValid,
operatorMaintenance: _operatorMaintenance());
MaybeFireServerUriArray(topology);
return Emit(value, topology);
}
private static RedundancyRole MapRole(RedundancyRole role) => role switch
{
// Standalone is serving; treat as Primary for the matrix since the calculator
// already special-cases Standalone inside its Compute.
RedundancyRole.Primary => RedundancyRole.Primary,
RedundancyRole.Secondary => RedundancyRole.Secondary,
_ => RedundancyRole.Standalone,
};
private ServiceLevelSnapshot Emit(byte value, RedundancyTopology? topology)
{
var snap = new ServiceLevelSnapshot(
Value: value,
Band: ServiceLevelCalculator.Classify(value),
Topology: topology);
if (value != _lastByte)
{
_lastByte = value;
OnStateChanged?.Invoke(snap);
}
return snap;
}
private void MaybeFireServerUriArray(RedundancyTopology topology)
{
var current = topology.ServerUriArray();
if (_lastServerUriArray is null || !current.SequenceEqual(_lastServerUriArray, StringComparer.Ordinal))
{
_lastServerUriArray = current;
OnServerUriArrayChanged?.Invoke(current);
}
}
}
/// <summary>Per-tick output of <see cref="RedundancyStatePublisher.ComputeAndPublish"/>.</summary>
public sealed record ServiceLevelSnapshot(
byte Value,
ServiceLevelBand Band,
RedundancyTopology? Topology);

View File

@@ -0,0 +1,55 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Snapshot of the cluster topology the <see cref="RedundancyCoordinator"/> holds. Read
/// once at startup + refreshed on publish-generation notification. Immutable — every
/// refresh produces a new instance so observers can compare identity-equality to detect
/// topology change.
/// </summary>
/// <remarks>
/// Per Phase 6.3 Stream A.1. Invariants enforced by the loader (see
/// <see cref="ClusterTopologyLoader"/>): at most one Primary per cluster for
/// WarmActive/Hot redundancy modes; every node has a unique ApplicationUri (OPC UA
/// Part 4 requirement — clients pin trust here); at most 2 nodes total per cluster
/// (decision #83).
/// </remarks>
public sealed record RedundancyTopology(
string ClusterId,
string SelfNodeId,
RedundancyRole SelfRole,
RedundancyMode Mode,
IReadOnlyList<RedundancyPeer> Peers,
string SelfApplicationUri)
{
/// <summary>Peer count — 0 for a standalone (single-node) cluster, 1 for v2 two-node clusters.</summary>
public int PeerCount => Peers.Count;
/// <summary>
/// ServerUriArray shape per OPC UA Part 4 §6.6.2.2 — self first, peers in stable
/// deterministic order (lexicographic by NodeId), self's ApplicationUri always at index 0.
/// </summary>
public IReadOnlyList<string> ServerUriArray() =>
new[] { SelfApplicationUri }
.Concat(Peers.OrderBy(p => p.NodeId, StringComparer.OrdinalIgnoreCase).Select(p => p.ApplicationUri))
.ToList();
}
/// <summary>One peer in the cluster (every node other than self).</summary>
/// <param name="NodeId">Peer's stable logical NodeId (e.g. <c>"LINE3-OPCUA-B"</c>).</param>
/// <param name="Role">Peer's declared redundancy role from the shared config DB.</param>
/// <param name="Host">Peer's hostname / IP — drives the health-probe target.</param>
/// <param name="OpcUaPort">Peer's OPC UA endpoint port.</param>
/// <param name="DashboardPort">Peer's dashboard / health-endpoint port.</param>
/// <param name="ApplicationUri">Peer's declared ApplicationUri (carried in <see cref="RedundancyTopology.ServerUriArray"/>).</param>
public sealed record RedundancyPeer(
string NodeId,
RedundancyRole Role,
string Host,
int OpcUaPort,
int DashboardPort,
string ApplicationUri);
/// <summary>Thrown when the loader detects a topology-invariant violation at startup or refresh.</summary>
public sealed class InvalidTopologyException(string message) : Exception(message);

View File

@@ -0,0 +1,139 @@
using Microsoft.Extensions.Logging;
using Opc.Ua;
using Opc.Ua.Server;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
using ConfigRedundancyMode = ZB.MOM.WW.OtOpcUa.Configuration.Enums.RedundancyMode;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Phase 6.3 Stream C (task #147) — the seam that carries the
/// <see cref="RedundancyStatePublisher"/>'s computed values onto the standard OPC UA
/// Server object nodes:
/// <list type="bullet">
/// <item><c>Server.ServiceLevel</c> (<see cref="VariableIds.Server_ServiceLevel"/>)
/// — Byte (0..255), Part 5 §6.3.34. Clients poll to pick the healthiest peer.</item>
/// <item><c>Server.ServerRedundancy.RedundancySupport</c>
/// (<see cref="VariableIds.Server_ServerRedundancy_RedundancySupport"/>)
/// — advertises Warm / Hot / Cold / None per Part 4 §6.6.2.</item>
/// <item><c>Server.ServerRedundancy.ServerUriArray</c>
/// (<see cref="VariableIds.NonTransparentRedundancyType_ServerUriArray"/>
/// when the redundancy node is upgraded to non-transparent)
/// — ApplicationUri of every node in the pair, self first.</item>
/// </list>
/// The writer is constructed once during the <c>OtOpcUaServer.OnServerStarted</c> hook;
/// callers invoke <see cref="ApplyServiceLevel"/> / <see cref="ApplyServerUriArray"/> /
/// <see cref="ApplyRedundancySupport"/> on publisher events. Each setter updates the
/// underlying <see cref="BaseVariableState.Value"/> then calls
/// <see cref="NodeState.ClearChangeMasks"/> to flush the change to subscribers.
/// </summary>
/// <remarks>
/// The writer is defensive: if the expected node shape isn't present on this particular
/// SDK build (e.g. <c>ServerUriArray</c> only exists on the
/// <c>NonTransparentRedundancyType</c> subtype and the ServerObject's default
/// <c>ServerRedundancy</c> property is the base type) the writer logs a warning once and
/// skips that specific update rather than throwing — matches the SDK's own tolerance
/// for optional address-space shape.
/// </remarks>
public sealed class ServerRedundancyNodeWriter
{
private readonly IServerInternal _server;
private readonly ILogger<ServerRedundancyNodeWriter> _logger;
private readonly object _gate = new();
private bool _warnedMissingServerUriArray;
private byte? _lastServiceLevel;
private RedundancySupport? _lastRedundancySupport;
private IReadOnlyList<string>? _lastServerUriArray;
public ServerRedundancyNodeWriter(IServerInternal server, ILogger<ServerRedundancyNodeWriter> logger)
{
ArgumentNullException.ThrowIfNull(server);
ArgumentNullException.ThrowIfNull(logger);
_server = server;
_logger = logger;
}
/// <summary>Push a new Byte value onto <c>Server.ServiceLevel</c> + notify subscribers.</summary>
public void ApplyServiceLevel(byte value)
{
var serverObject = _server.ServerObject;
if (serverObject?.ServiceLevel is not { } node) return;
lock (_gate)
{
if (_lastServiceLevel == value) return;
_lastServiceLevel = value;
node.Value = value;
node.Timestamp = DateTime.UtcNow;
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
}
}
/// <summary>
/// Map the Configuration-side <see cref="ConfigRedundancyMode"/> to OPC UA's
/// <see cref="RedundancySupport"/> enum + apply to
/// <c>Server.ServerRedundancy.RedundancySupport</c>. Called once at
/// the <c>OtOpcUaServer.OnServerStarted</c> hook — the value is effectively static per
/// deployment.
/// </summary>
public void ApplyRedundancySupport(ConfigRedundancyMode mode)
{
var serverObject = _server.ServerObject;
if (serverObject?.ServerRedundancy?.RedundancySupport is not { } node) return;
// RedundancyMode only declares None / Warm / Hot in v2.0 (non-transparent only per
// decision #85). OPC UA's RedundancySupport has more states — clamp to the three we
// support and let config-DB CHECK constraints prevent surprises.
var support = mode switch
{
ConfigRedundancyMode.Warm => RedundancySupport.Warm,
ConfigRedundancyMode.Hot => RedundancySupport.Hot,
_ => RedundancySupport.None,
};
lock (_gate)
{
if (_lastRedundancySupport == support) return;
_lastRedundancySupport = support;
node.Value = support;
node.Timestamp = DateTime.UtcNow;
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
}
}
/// <summary>
/// Push the self-first peer-URI list onto
/// <c>Server.ServerRedundancy.ServerUriArray</c>. Only applies when the SDK created
/// <c>ServerRedundancy</c> as <see cref="NonTransparentRedundancyState"/>; on the
/// base <see cref="ServerRedundancyState"/> the child is absent and we log-and-skip.
/// </summary>
public void ApplyServerUriArray(IReadOnlyList<string> serverUris)
{
ArgumentNullException.ThrowIfNull(serverUris);
var serverObject = _server.ServerObject;
if (serverObject?.ServerRedundancy is not NonTransparentRedundancyState ntr
|| ntr.ServerUriArray is not { } node)
{
if (!_warnedMissingServerUriArray)
{
_warnedMissingServerUriArray = true;
_logger.LogWarning(
"Server.ServerRedundancy is not NonTransparentRedundancyState — ServerUriArray " +
"cannot be published on this server instance. Clients will not see peer URIs " +
"on the Part 4 §6.6.2 redundancy node until the redundancy-object type is upgraded.");
}
return;
}
lock (_gate)
{
if (_lastServerUriArray is not null && _lastServerUriArray.SequenceEqual(serverUris, StringComparer.Ordinal))
return;
_lastServerUriArray = [.. serverUris];
node.Value = [.. serverUris];
node.Timestamp = DateTime.UtcNow;
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
}
}
}

View File

@@ -0,0 +1,131 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
/// <summary>
/// Pure-function translator from the redundancy-state inputs (role, self health, peer
/// reachability via HTTP + UA probes, apply-in-progress flag, recovery dwell, topology
/// validity) to the OPC UA Part 5 §6.3.34 <see cref="byte"/> ServiceLevel value.
/// </summary>
/// <remarks>
/// <para>Per decision #154 the 8-state matrix avoids the reserved bands (0=Maintenance,
/// 1=NoData) for operational states. Operational values occupy 2..255 so a spec-compliant
/// client that cuts over on "&lt;3 = unhealthy" keeps working without its vendor treating
/// the server as "under maintenance" during normal runtime.</para>
///
/// <para>This class is pure — no threads, no I/O. The coordinator that owns it re-evaluates
/// on every input change and pushes the new byte through an <c>IObserver&lt;byte&gt;</c> to
/// the OPC UA ServiceLevel variable. Tests exercise the full matrix without touching a UA
/// stack.</para>
/// </remarks>
public static class ServiceLevelCalculator
{
/// <summary>Compute the ServiceLevel for the given inputs.</summary>
/// <param name="role">Role declared for this node in the shared config DB.</param>
/// <param name="selfHealthy">This node's own health (from Phase 6.1 /healthz).</param>
/// <param name="peerUaHealthy">Peer node reachable via OPC UA probe.</param>
/// <param name="peerHttpHealthy">Peer node reachable via HTTP /healthz probe.</param>
/// <param name="applyInProgress">True while this node is inside a publish-generation apply window.</param>
/// <param name="recoveryDwellMet">True once the post-fault dwell + publish-witness conditions are met.</param>
/// <param name="topologyValid">False when the cluster has detected &gt;1 Primary (InvalidTopology demotes both nodes).</param>
/// <param name="operatorMaintenance">True when operator has declared the node in maintenance.</param>
public static byte Compute(
RedundancyRole role,
bool selfHealthy,
bool peerUaHealthy,
bool peerHttpHealthy,
bool applyInProgress,
bool recoveryDwellMet,
bool topologyValid,
bool operatorMaintenance = false)
{
// Reserved bands first — they override everything per OPC UA Part 5 §6.3.34.
if (operatorMaintenance) return (byte)ServiceLevelBand.Maintenance; // 0
if (!selfHealthy) return (byte)ServiceLevelBand.NoData; // 1
if (!topologyValid) return (byte)ServiceLevelBand.InvalidTopology; // 2
// Standalone nodes have no peer — treat as authoritative when healthy.
if (role == RedundancyRole.Standalone)
return (byte)(applyInProgress ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.AuthoritativePrimary);
var isPrimary = role == RedundancyRole.Primary;
// Apply-in-progress band dominates recovery + isolation (client should cut to peer).
if (applyInProgress)
return (byte)(isPrimary ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.BackupMidApply);
// Post-fault recovering — hold until dwell + witness satisfied.
if (!recoveryDwellMet)
return (byte)(isPrimary ? ServiceLevelBand.RecoveringPrimary : ServiceLevelBand.RecoveringBackup);
// Peer unreachable (either probe fails) → isolated band. Per decision #154 Primary
// retains authority at 230 when isolated; Backup signals 80 "take over if asked" and
// does NOT auto-promote (non-transparent model).
var peerReachable = peerUaHealthy && peerHttpHealthy;
if (!peerReachable)
return (byte)(isPrimary ? ServiceLevelBand.IsolatedPrimary : ServiceLevelBand.IsolatedBackup);
return (byte)(isPrimary ? ServiceLevelBand.AuthoritativePrimary : ServiceLevelBand.AuthoritativeBackup);
}
/// <summary>Labels a ServiceLevel byte with its matrix band name — for logs + Admin UI.</summary>
public static ServiceLevelBand Classify(byte value) => value switch
{
(byte)ServiceLevelBand.Maintenance => ServiceLevelBand.Maintenance,
(byte)ServiceLevelBand.NoData => ServiceLevelBand.NoData,
(byte)ServiceLevelBand.InvalidTopology => ServiceLevelBand.InvalidTopology,
(byte)ServiceLevelBand.RecoveringBackup => ServiceLevelBand.RecoveringBackup,
(byte)ServiceLevelBand.BackupMidApply => ServiceLevelBand.BackupMidApply,
(byte)ServiceLevelBand.IsolatedBackup => ServiceLevelBand.IsolatedBackup,
(byte)ServiceLevelBand.AuthoritativeBackup => ServiceLevelBand.AuthoritativeBackup,
(byte)ServiceLevelBand.RecoveringPrimary => ServiceLevelBand.RecoveringPrimary,
(byte)ServiceLevelBand.PrimaryMidApply => ServiceLevelBand.PrimaryMidApply,
(byte)ServiceLevelBand.IsolatedPrimary => ServiceLevelBand.IsolatedPrimary,
(byte)ServiceLevelBand.AuthoritativePrimary => ServiceLevelBand.AuthoritativePrimary,
_ => ServiceLevelBand.Unknown,
};
}
/// <summary>
/// Named bands of the 8-state ServiceLevel matrix. Numeric values match the
/// <see cref="ServiceLevelCalculator"/> table exactly; any drift will be caught by the
/// Phase 6.3 compliance script.
/// </summary>
public enum ServiceLevelBand : byte
{
/// <summary>Operator-declared maintenance. Reserved per OPC UA Part 5 §6.3.34.</summary>
Maintenance = 0,
/// <summary>Unreachable / Faulted. Reserved per OPC UA Part 5 §6.3.34.</summary>
NoData = 1,
/// <summary>Detected-inconsistency band — &gt;1 Primary observed runtime; both nodes self-demote.</summary>
InvalidTopology = 2,
/// <summary>Backup post-fault, dwell not met.</summary>
RecoveringBackup = 30,
/// <summary>Backup inside a publish-apply window.</summary>
BackupMidApply = 50,
/// <summary>Backup with unreachable Primary — "take over if asked"; does NOT auto-promote.</summary>
IsolatedBackup = 80,
/// <summary>Backup nominal operation.</summary>
AuthoritativeBackup = 100,
/// <summary>Primary post-fault, dwell not met.</summary>
RecoveringPrimary = 180,
/// <summary>Primary inside a publish-apply window.</summary>
PrimaryMidApply = 200,
/// <summary>Primary with unreachable peer, self serving — retains authority.</summary>
IsolatedPrimary = 230,
/// <summary>Primary nominal operation.</summary>
AuthoritativePrimary = 255,
/// <summary>Sentinel for unrecognised byte values.</summary>
Unknown = 254,
}

View File

@@ -0,0 +1,100 @@
using System.Text.Json;
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
namespace ZB.MOM.WW.OtOpcUa.Server;
/// <summary>
/// Phase 6.1 Stream D consumption hook — bootstraps the node's current generation through
/// the <see cref="ResilientConfigReader"/> pipeline + writes every successful central-DB
/// read into the <see cref="GenerationSealedCache"/> so the next cache-miss path has a
/// sealed snapshot to fall back to.
/// </summary>
/// <remarks>
/// <para>Alongside the original <see cref="NodeBootstrap"/> (which uses the single-file
/// <see cref="ILocalConfigCache"/>). Program.cs can switch to this one once operators are
/// ready for the generation-sealed semantics. The original stays for backward compat
/// with the three integration tests that construct <see cref="NodeBootstrap"/> directly.</para>
///
/// <para>Closes release blocker #2 in <c>docs/v2/v2-release-readiness.md</c> — the
/// generation-sealed cache + resilient reader + stale-config flag ship as unit-tested
/// primitives in PR #81 but no production path consumed them until this wrapper.</para>
/// </remarks>
public sealed class SealedBootstrap
{
private readonly NodeOptions _options;
private readonly GenerationSealedCache _cache;
private readonly ResilientConfigReader _reader;
private readonly StaleConfigFlag _staleFlag;
private readonly ILogger<SealedBootstrap> _logger;
public SealedBootstrap(
NodeOptions options,
GenerationSealedCache cache,
ResilientConfigReader reader,
StaleConfigFlag staleFlag,
ILogger<SealedBootstrap> logger)
{
_options = options;
_cache = cache;
_reader = reader;
_staleFlag = staleFlag;
_logger = logger;
}
/// <summary>
/// Resolve the current generation for this node. Routes the central-DB fetch through
/// <see cref="ResilientConfigReader"/> (timeout → retry → fallback-to-cache) + seals a
/// fresh snapshot on every successful DB read so a future cache-miss has something to
/// serve.
/// </summary>
public async Task<BootstrapResult> LoadCurrentGenerationAsync(CancellationToken ct)
{
return await _reader.ReadAsync(
_options.ClusterId,
centralFetch: async innerCt => await FetchFromCentralAsync(innerCt).ConfigureAwait(false),
fromSnapshot: snap => BootstrapResult.FromCache(snap.GenerationId),
ct).ConfigureAwait(false);
}
private async ValueTask<BootstrapResult> FetchFromCentralAsync(CancellationToken ct)
{
await using var conn = new SqlConnection(_options.ConfigDbConnectionString);
await conn.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = conn.CreateCommand();
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
cmd.Parameters.AddWithValue("@n", _options.NodeId);
cmd.Parameters.AddWithValue("@c", _options.ClusterId);
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
if (!await reader.ReadAsync(ct).ConfigureAwait(false))
{
_logger.LogWarning("Cluster {Cluster} has no Published generation yet", _options.ClusterId);
return BootstrapResult.EmptyFromDb();
}
var generationId = reader.GetInt64(0);
_logger.LogInformation("Bootstrapped from central DB: generation {GenerationId}; sealing snapshot", generationId);
// Seal a minimal snapshot with the generation pointer. A richer snapshot that carries
// the full sp_GetGenerationContent payload lands when the bootstrap flow grows to
// consume the content during offline operation (separate follow-up — see decision #148
// and phase-6-1 Stream D.3). The pointer alone is enough for the fallback path to
// surface the last-known-good generation id + flip UsingStaleConfig.
await _cache.SealAsync(new GenerationSnapshot
{
ClusterId = _options.ClusterId,
GenerationId = generationId,
CachedAt = DateTime.UtcNow,
PayloadJson = JsonSerializer.Serialize(new { generationId, source = "sp_GetCurrentGenerationForCluster" }),
}, ct).ConfigureAwait(false);
// StaleConfigFlag bookkeeping: ResilientConfigReader.MarkFresh on the returning call
// path; we're on the fresh branch so we don't touch the flag here.
_ = _staleFlag; // held so the field isn't flagged unused
return BootstrapResult.FromDb(generationId);
}
}

View File

@@ -0,0 +1,115 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Bootstraps the Phase 6.2 authorization pipeline for the running Server. Loads
/// <c>NodeAcl</c> rows for the current generation into a
/// <see cref="PermissionTrieCache"/>, constructs an <see cref="AuthorizationGate"/>,
/// and merges per-namespace <see cref="EquipmentNamespaceContent"/> into a single
/// full-path index for <see cref="NodeScopeResolver"/>.
/// </summary>
/// <remarks>
/// <para>
/// Called by <c>OpcUaServerService.ExecuteAsync</c> after the
/// <see cref="DriverEquipmentContentRegistry"/> has been populated but before
/// <c>OpcUaApplicationHost.StartAsync</c> runs — that's the window where the
/// config-DB state is known + the OPC UA server hasn't yet captured the gate
/// references.
/// </para>
/// <para>
/// <see cref="AuthorizationOptions.Enabled"/> gates the whole flow. When
/// <c>false</c> (default), <see cref="BuildAsync"/> returns <c>(null, null)</c>
/// and the dispatch layer short-circuits every ACL check — identical to
/// pre-Phase-6.2.
/// </para>
/// </remarks>
public sealed class AuthorizationBootstrap(
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
DriverEquipmentContentRegistry equipmentContentRegistry,
NodeOptions nodeOptions,
ILogger<AuthorizationBootstrap> logger)
{
/// <summary>
/// Build a gate + resolver pair for the supplied <paramref name="generationId"/>.
/// Returns <c>(null, null)</c> when authorization is disabled via
/// <see cref="AuthorizationOptions.Enabled"/> or when the generation couldn't be
/// fetched — in that case the dispatch layer runs without ACL enforcement (same
/// behaviour the Server had before Phase 6.2 Stream C landed).
/// </summary>
public async Task<(AuthorizationGate?, NodeScopeResolver?)> BuildAsync(
long? generationId, CancellationToken cancellationToken)
{
if (!nodeOptions.Authorization.Enabled)
{
logger.LogInformation(
"Authorization disabled (Node:Authorization:Enabled=false); all ACL gates remain inert");
return (null, null);
}
if (generationId is not long gen)
{
logger.LogWarning(
"Authorization enabled but no Published generation available — ACL enforcement skipped until next publish");
return (null, null);
}
var gate = await BuildGateAsync(gen, cancellationToken).ConfigureAwait(false);
var resolver = BuildResolver();
logger.LogInformation(
"Authorization pipeline bootstrapped — generation {Gen}, strictMode={Strict}",
gen, nodeOptions.Authorization.StrictMode);
return (gate, resolver);
}
/// <summary>
/// Load every <see cref="Configuration.Entities.NodeAcl"/> row for
/// <paramref name="generationId"/> scoped to this node's cluster, build a
/// <see cref="PermissionTrieCache"/>, construct an <see cref="AuthorizationGate"/>.
/// </summary>
private async Task<AuthorizationGate> BuildGateAsync(long generationId, CancellationToken cancellationToken)
{
await using var ctx = await dbFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
var rows = await ctx.NodeAcls
.AsNoTracking()
.Where(a => a.ClusterId == nodeOptions.ClusterId && a.GenerationId == generationId)
.ToListAsync(cancellationToken)
.ConfigureAwait(false);
var cache = new PermissionTrieCache();
cache.Install(PermissionTrieBuilder.Build(nodeOptions.ClusterId, generationId, rows));
var evaluator = new TriePermissionEvaluator(cache);
return new AuthorizationGate(evaluator, strictMode: nodeOptions.Authorization.StrictMode);
}
/// <summary>
/// Merge each registered driver's <see cref="EquipmentNamespaceContent"/> into a single
/// full-path index. Tag rows that cross-reference missing Equipment / Line / Area are
/// silently skipped (the cluster-only fallback handles them). Duplicate TagConfig
/// across namespaces is a config error — <see cref="ScopePathIndexBuilder"/> throws
/// on collision; we let that bubble so bootstrap fails fast.
/// </summary>
private NodeScopeResolver BuildResolver()
{
var merged = new Dictionary<string, NodeScope>(StringComparer.Ordinal);
foreach (var kv in equipmentContentRegistry.Snapshot())
{
// Namespace id isn't carried on EquipmentNamespaceContent directly — driverId
// serves as the namespace-stable key for ACL scope resolution.
var perNamespace = ScopePathIndexBuilder.Build(nodeOptions.ClusterId, kv.Key, kv.Value);
foreach (var entry in perNamespace)
merged[entry.Key] = entry.Value;
}
return merged.Count == 0
? new NodeScopeResolver(nodeOptions.ClusterId)
: new NodeScopeResolver(nodeOptions.ClusterId, merged);
}
}

View File

@@ -0,0 +1,86 @@
using Opc.Ua;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Bridges the OPC UA stack's <see cref="ISystemContext.UserIdentity"/> to the
/// <see cref="IPermissionEvaluator"/> evaluator. Resolves the session's
/// <see cref="UserAuthorizationState"/> from whatever the identity claims + the stack's
/// session handle, then delegates to the evaluator and returns a single bool the
/// dispatch paths can use to short-circuit with <c>BadUserAccessDenied</c>.
/// </summary>
/// <remarks>
/// <para>This class is deliberately the single integration seam between the Server
/// project and the <c>Core.Authorization</c> evaluator. DriverNodeManager holds one
/// reference and calls <see cref="IsAllowed"/> on every Read / Write / HistoryRead /
/// Browse / Call / CreateMonitoredItems / etc. The evaluator itself stays pure — it
/// doesn't know about the OPC UA stack types.</para>
///
/// <para>Fail-open-during-transition: when the evaluator is configured with
/// <c>StrictMode = false</c>, missing cluster tries OR sessions without resolved
/// LDAP groups get <c>true</c> so existing deployments keep working while ACLs are
/// populated. Flip to strict via <c>Authorization:StrictMode = true</c> in production.</para>
/// </remarks>
public sealed class AuthorizationGate
{
private readonly IPermissionEvaluator _evaluator;
private readonly bool _strictMode;
private readonly TimeProvider _timeProvider;
public AuthorizationGate(IPermissionEvaluator evaluator, bool strictMode = false, TimeProvider? timeProvider = null)
{
_evaluator = evaluator ?? throw new ArgumentNullException(nameof(evaluator));
_strictMode = strictMode;
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <summary>True when strict authorization is enabled — no-grant = denied.</summary>
public bool StrictMode => _strictMode;
/// <summary>
/// Authorize an OPC UA operation against the session identity + scope. Returns true to
/// allow the dispatch to continue; false to surface <c>BadUserAccessDenied</c>.
/// </summary>
public bool IsAllowed(IUserIdentity? identity, OpcUaOperation operation, NodeScope scope)
{
// Anonymous / unknown identity — strict mode denies, lax mode allows so the fallback
// auth layers (WriteAuthzPolicy) still see the call.
if (identity is null) return !_strictMode;
var session = BuildSessionState(identity, scope.ClusterId);
if (session is null)
{
// Identity doesn't carry LDAP groups. In lax mode let the dispatch proceed so
// older deployments keep working; strict mode denies.
return !_strictMode;
}
var decision = _evaluator.Authorize(session, operation, scope);
if (decision.IsAllowed) return true;
return !_strictMode;
}
/// <summary>
/// Materialize a <see cref="UserAuthorizationState"/> from the session identity.
/// Returns null when the identity doesn't carry LDAP group metadata.
/// </summary>
public UserAuthorizationState? BuildSessionState(IUserIdentity identity, string clusterId)
{
if (identity is not ILdapGroupsBearer bearer || bearer.LdapGroups.Count == 0)
return null;
var sessionId = identity.DisplayName ?? Guid.NewGuid().ToString("N");
return new UserAuthorizationState
{
SessionId = sessionId,
ClusterId = clusterId,
LdapGroups = bearer.LdapGroups,
MembershipResolvedUtc = _timeProvider.GetUtcNow().UtcDateTime,
AuthGenerationId = 0,
MembershipVersion = 0,
};
}
}

View File

@@ -0,0 +1,33 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Configuration for the Phase 6.2 authorization pipeline. Bound from the
/// <c>Node:Authorization</c> section of <c>appsettings.json</c>. Defaults ship disabled
/// so upgrading from pre-Phase-6.2 doesn't accidentally start denying reads the day a
/// new build lands — operators opt in explicitly once their <c>NodeAcl</c> rows are
/// populated.
/// </summary>
/// <remarks>
/// <para>
/// <see cref="Enabled"/> is the master switch. When <c>false</c> (default),
/// the OPC UA application host constructs with
/// <c>authzGate: null, scopeResolver: null</c>; all six dispatch-layer gates
/// (Read, Write, HistoryRead, Browse, CreateMonitoredItems, Call) short-circuit
/// to pass — identical behaviour to pre-Phase-6.2.
/// </para>
/// <para>
/// When <c>true</c>, <see cref="StrictMode"/> picks between two failure modes:
/// <c>false</c> (default) grants anonymous / no-LDAP-groups identities a pass-
/// through so v1-style legacy clients keep working; <c>true</c> denies them.
/// Production deployments should flip to <c>StrictMode = true</c> once every
/// client has been validated against the new identity flow.
/// </para>
/// </remarks>
public sealed class AuthorizationOptions
{
/// <summary>Master switch. False = gate is inert; true = gate is wired into dispatch.</summary>
public bool Enabled { get; init; }
/// <summary>False = anonymous / no-groups identities pass; true = they're denied.</summary>
public bool StrictMode { get; init; }
}

View File

@@ -0,0 +1,20 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Minimal interface an <see cref="Opc.Ua.IUserIdentity"/> exposes so the Phase 6.2
/// authorization evaluator can read the session's resolved LDAP group DNs without a
/// hard dependency on any specific identity subtype. Implemented by OtOpcUaServer's
/// role-based identity; tests stub it to drive the evaluator under different group
/// memberships.
/// </summary>
/// <remarks>
/// Control/data-plane separation (decision #150): Admin UI role routing consumes
/// <see cref="IRoleBearer.Roles"/> via <c>LdapGroupRoleMapping</c>; the OPC UA data-path
/// evaluator consumes <see cref="LdapGroups"/> directly against <c>NodeAcl</c>. The two
/// are sourced from the same directory query at sign-in but never cross.
/// </remarks>
public interface ILdapGroupsBearer
{
/// <summary>Fully-qualified LDAP group DNs the user is a member of.</summary>
IReadOnlyList<string> LdapGroups { get; }
}

View File

@@ -0,0 +1,13 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Minimal interface a <see cref="Opc.Ua.IUserIdentity"/> implementation can expose so
/// <see cref="ZB.MOM.WW.OtOpcUa.Server.OpcUa.DriverNodeManager"/> can read the session's
/// resolved roles without a hard dependency on any specific identity subtype. Implemented
/// by <c>OtOpcUaServer.RoleBasedIdentity</c>; tests implement it with stub identities to
/// drive the authz policy under different role sets.
/// </summary>
public interface IRoleBearer
{
IReadOnlyList<string> Roles { get; }
}

View File

@@ -0,0 +1,30 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Validates a (username, password) pair and returns the resolved OPC UA roles for the user.
/// The Server's <c>SessionManager_ImpersonateUser</c> hook delegates here so unit tests can
/// swap in a fake authenticator without a live LDAP.
/// </summary>
public interface IUserAuthenticator
{
Task<UserAuthResult> AuthenticateAsync(string username, string password, CancellationToken ct = default);
}
/// <param name="Success">True iff the bind succeeded and roles/groups were resolved.</param>
/// <param name="DisplayName">User display name from LDAP, or null on failure.</param>
/// <param name="Roles">Mapped OPC UA role names (Admin / control-plane consumption — see decision #150).</param>
/// <param name="Groups">Raw LDAP group names the user belongs to. Phase 6.2 data-path authorization
/// (NodeAcl evaluator) keys off this list directly, not Roles. Empty for anonymous / failed binds.</param>
/// <param name="Error">Human-readable failure reason, or null on success.</param>
public sealed record UserAuthResult(
bool Success, string? DisplayName, IReadOnlyList<string> Roles, IReadOnlyList<string> Groups, string? Error);
/// <summary>
/// Always-reject authenticator used when no security config is provided. Lets the server
/// start (with only an anonymous endpoint) without throwing on UserName token attempts.
/// </summary>
public sealed class DenyAllUserAuthenticator : IUserAuthenticator
{
public Task<UserAuthResult> AuthenticateAsync(string _, string __, CancellationToken ___)
=> Task.FromResult(new UserAuthResult(false, null, [], [], "UserName token not supported"));
}

View File

@@ -0,0 +1,72 @@
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// LDAP settings for the OPC UA server's UserName token validator. Bound from
/// <c>appsettings.json</c> <c>OpcUaServer:Ldap</c>. Defaults target the GLAuth dev instance
/// (localhost:3893, <c>dc=lmxopcua,dc=local</c>) for the stock inner-loop setup. Production
/// deployments are expected to point at Active Directory; see <see cref="UserNameAttribute"/>
/// and the per-field xml-docs for the AD-specific overrides.
/// </summary>
/// <remarks>
/// <para><b>Active Directory cheat-sheet</b>:</para>
/// <list type="bullet">
/// <item><see cref="Server"/>: one of the domain controllers, or the domain FQDN (will round-robin DCs).</item>
/// <item><see cref="Port"/>: <c>389</c> (LDAP) or <c>636</c> (LDAPS); use 636 + <see cref="UseTls"/> in production.</item>
/// <item><see cref="UseTls"/>: <c>true</c>. AD increasingly rejects plain-LDAP bind under LDAP-signing enforcement.</item>
/// <item><see cref="AllowInsecureLdap"/>: <c>false</c>. Dev escape hatch only.</item>
/// <item><see cref="SearchBase"/>: <c>DC=corp,DC=example,DC=com</c> — your domain's base DN.</item>
/// <item><see cref="ServiceAccountDn"/>: a dedicated service principal with read access to user + group entries
/// (e.g. <c>CN=OpcUaSvc,OU=Service Accounts,DC=corp,DC=example,DC=com</c>). Never a privileged admin.</item>
/// <item><see cref="UserNameAttribute"/>: <c>sAMAccountName</c> (classic login name) or <c>userPrincipalName</c>
/// (user@domain form). Default is <c>uid</c> which AD does <b>not</b> populate, so this override is required.</item>
/// <item><see cref="DisplayNameAttribute"/>: <c>displayName</c> gives the human name; <c>cn</c> works too but is less rich.</item>
/// <item><see cref="GroupAttribute"/>: <c>memberOf</c> — matches AD's default. Values are full DNs
/// (<c>CN=&lt;Group&gt;,OU=...,DC=...</c>); the authenticator strips the leading <c>CN=</c> RDN value and uses
/// that as the lookup key in <see cref="GroupToRole"/>.</item>
/// <item><see cref="GroupToRole"/>: maps your AD group common-names to OPC UA roles — e.g.
/// <c>{"OPCUA-Operators" : "WriteOperate", "OPCUA-Engineers" : "WriteConfigure"}</c>.</item>
/// </list>
/// <para>
/// Nested groups are <b>not</b> expanded — AD's <c>tokenGroups</c> / <c>LDAP_MATCHING_RULE_IN_CHAIN</c>
/// membership-chain filter isn't used. Assign users directly to the role-mapped groups, or pre-flatten
/// membership in your directory. If nested expansion becomes a requirement, it's an authenticator
/// enhancement (not a config change).
/// </para>
/// </remarks>
public sealed class LdapOptions
{
public bool Enabled { get; init; } = false;
public string Server { get; init; } = "localhost";
public int Port { get; init; } = 3893;
public bool UseTls { get; init; } = false;
/// <summary>Dev-only escape hatch — must be false in production.</summary>
public bool AllowInsecureLdap { get; init; } = true;
public string SearchBase { get; init; } = "dc=lmxopcua,dc=local";
public string ServiceAccountDn { get; init; } = string.Empty;
public string ServiceAccountPassword { get; init; } = string.Empty;
public string DisplayNameAttribute { get; init; } = "cn";
public string GroupAttribute { get; init; } = "memberOf";
/// <summary>
/// LDAP attribute used to match a login name against user entries in the directory.
/// Defaults to <c>uid</c> (RFC 2307). Common overrides:
/// <list type="bullet">
/// <item><c>sAMAccountName</c> — Active Directory, classic NT-style login names (e.g. <c>jdoe</c>).</item>
/// <item><c>userPrincipalName</c> — Active Directory, email-style (e.g. <c>jdoe@corp.example.com</c>).</item>
/// <item><c>cn</c> — GLAuth + some OpenLDAP deployments where users are keyed by common-name.</item>
/// </list>
/// Used only when <see cref="ServiceAccountDn"/> is non-empty (search-then-bind path) —
/// direct-bind fallback constructs the DN as <c>cn=&lt;name&gt;,&lt;SearchBase&gt;</c>
/// regardless of this setting and is not a production-grade path against AD.
/// </summary>
public string UserNameAttribute { get; init; } = "uid";
/// <summary>
/// LDAP group → OPC UA role. Each authenticated user gets every role whose source group
/// is in their membership list. Recognized role names (CLAUDE.md): <c>ReadOnly</c> (browse
/// + read), <c>WriteOperate</c>, <c>WriteTune</c>, <c>WriteConfigure</c>, <c>AlarmAck</c>.
/// </summary>
public Dictionary<string, string> GroupToRole { get; init; } = new(StringComparer.OrdinalIgnoreCase);
}

View File

@@ -0,0 +1,151 @@
using Microsoft.Extensions.Logging;
using Novell.Directory.Ldap;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// <see cref="IUserAuthenticator"/> that binds to the configured LDAP directory to validate
/// the (username, password) pair, then pulls group membership and maps to OPC UA roles.
/// Mirrors the bind-then-search pattern in <c>Admin.Security.LdapAuthService</c> but stays
/// in the Server project so the Server process doesn't take a cross-app dependency on Admin.
/// </summary>
public sealed class LdapUserAuthenticator(LdapOptions options, ILogger<LdapUserAuthenticator> logger)
: IUserAuthenticator
{
public async Task<UserAuthResult> AuthenticateAsync(string username, string password, CancellationToken ct = default)
{
if (!options.Enabled)
return new UserAuthResult(false, null, [], [], "LDAP authentication disabled");
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
return new UserAuthResult(false, null, [], [], "Credentials required");
if (!options.UseTls && !options.AllowInsecureLdap)
return new UserAuthResult(false, null, [], [],
"Insecure LDAP is disabled. Set UseTls or AllowInsecureLdap for dev/test.");
try
{
using var conn = new LdapConnection();
if (options.UseTls) conn.SecureSocketLayer = true;
await Task.Run(() => conn.Connect(options.Server, options.Port), ct);
var bindDn = await ResolveUserDnAsync(conn, username, ct);
await Task.Run(() => conn.Bind(bindDn, password), ct);
// Rebind as service account for attribute read, if configured — otherwise the just-
// bound user reads their own entry (works when ACL permits self-read).
if (!string.IsNullOrWhiteSpace(options.ServiceAccountDn))
await Task.Run(() => conn.Bind(options.ServiceAccountDn, options.ServiceAccountPassword), ct);
var displayName = username;
var groups = new List<string>();
try
{
var filter = $"(cn={EscapeLdapFilter(username)})";
var results = await Task.Run(() =>
conn.Search(options.SearchBase, LdapConnection.ScopeSub, filter, attrs: null, typesOnly: false), ct);
while (results.HasMore())
{
try
{
var entry = results.Next();
var name = entry.GetAttribute(options.DisplayNameAttribute);
if (name is not null) displayName = name.StringValue;
var groupAttr = entry.GetAttribute(options.GroupAttribute);
if (groupAttr is not null)
{
foreach (var groupDn in groupAttr.StringValueArray)
groups.Add(ExtractFirstRdnValue(groupDn));
}
// GLAuth fallback: primary group is encoded as the ou= RDN above cn=.
if (groups.Count == 0 && !string.IsNullOrEmpty(entry.Dn))
{
var primary = ExtractOuSegment(entry.Dn);
if (primary is not null) groups.Add(primary);
}
}
catch (LdapException) { break; }
}
}
catch (LdapException ex)
{
logger.LogWarning(ex, "LDAP attribute lookup failed for {User}", username);
}
conn.Disconnect();
var roles = groups
.Where(g => options.GroupToRole.ContainsKey(g))
.Select(g => options.GroupToRole[g])
.Distinct(StringComparer.OrdinalIgnoreCase)
.ToList();
return new UserAuthResult(true, displayName, roles, groups, null);
}
catch (LdapException ex)
{
logger.LogInformation("LDAP bind rejected user {User}: {Reason}", username, ex.ResultCode);
return new UserAuthResult(false, null, [], [], "Invalid username or password");
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogError(ex, "Unexpected LDAP error for {User}", username);
return new UserAuthResult(false, null, [], [], "Authentication error");
}
}
private async Task<string> ResolveUserDnAsync(LdapConnection conn, string username, CancellationToken ct)
{
if (username.Contains('=')) return username; // caller passed a DN directly
if (!string.IsNullOrWhiteSpace(options.ServiceAccountDn))
{
await Task.Run(() => conn.Bind(options.ServiceAccountDn, options.ServiceAccountPassword), ct);
var filter = $"({options.UserNameAttribute}={EscapeLdapFilter(username)})";
var results = await Task.Run(() =>
conn.Search(options.SearchBase, LdapConnection.ScopeSub, filter, ["dn"], false), ct);
if (results.HasMore())
return results.Next().Dn;
throw new LdapException("User not found", LdapException.NoSuchObject,
$"No entry for uid={username}");
}
return string.IsNullOrWhiteSpace(options.SearchBase)
? $"cn={username}"
: $"cn={username},{options.SearchBase}";
}
internal static string EscapeLdapFilter(string input) =>
input.Replace("\\", "\\5c")
.Replace("*", "\\2a")
.Replace("(", "\\28")
.Replace(")", "\\29")
.Replace("\0", "\\00");
internal static string? ExtractOuSegment(string dn)
{
foreach (var segment in dn.Split(','))
{
var trimmed = segment.Trim();
if (trimmed.StartsWith("ou=", StringComparison.OrdinalIgnoreCase))
return trimmed[3..];
}
return null;
}
internal static string ExtractFirstRdnValue(string dn)
{
var eq = dn.IndexOf('=');
if (eq < 0) return dn;
var valueStart = eq + 1;
var comma = dn.IndexOf(',', valueStart);
return comma > valueStart ? dn[valueStart..comma] : dn[valueStart..];
}
}

View File

@@ -0,0 +1,88 @@
using System.Collections.Frozen;
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Maps a driver-side full reference (e.g. <c>"TestMachine_001/Oven/SetPoint"</c>) to the
/// <see cref="NodeScope"/> the Phase 6.2 evaluator walks. Supports two modes:
/// <list type="bullet">
/// <item>
/// <b>Cluster-only (pre-ADR-001)</b> — when no path index is supplied the resolver
/// returns a flat <c>ClusterId + TagId</c> scope. Sufficient while the
/// Config-DB-driven Equipment walker isn't live; Cluster-level grants cascade to every
/// tag below per decision #129, so finer per-Equipment grants are effectively
/// cluster-wide at dispatch.
/// </item>
/// <item>
/// <b>Full-path (post-ADR-001 Task B)</b> — when an index is supplied, the resolver
/// joins the full reference against the index to produce a complete
/// <c>Cluster → Namespace → UnsArea → UnsLine → Equipment → Tag</c> scope. Unblocks
/// per-Equipment / per-UnsLine ACL grants at the dispatch layer.
/// </item>
/// </list>
/// </summary>
/// <remarks>
/// <para>The index is pre-loaded by the Server bootstrap against the published generation;
/// the resolver itself does no live DB access. Resolve is O(1) dictionary lookup on the
/// hot path; the fallback for unknown fullReference strings produces the same cluster-only
/// scope the pre-ADR-001 resolver returned — new tags picked up via driver discovery but
/// not yet indexed (e.g. between a DiscoverAsync result and the next generation publish)
/// stay addressable without a scope-resolver crash.</para>
///
/// <para>Thread-safety: both constructor paths freeze inputs into immutable state. Callers
/// may cache a single instance per DriverNodeManager without locks. Swap atomically on
/// generation change via the server's publish pipeline.</para>
/// </remarks>
public sealed class NodeScopeResolver
{
private readonly string _clusterId;
private readonly FrozenDictionary<string, NodeScope>? _index;
/// <summary>Cluster-only resolver — pre-ADR-001 behavior. Kept for Server processes that
/// haven't wired the Config-DB snapshot flow yet.</summary>
public NodeScopeResolver(string clusterId)
{
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
_clusterId = clusterId;
_index = null;
}
/// <summary>
/// Full-path resolver (ADR-001 Task B). <paramref name="pathIndex"/> maps each known
/// driver-side full reference to its pre-resolved <see cref="NodeScope"/> carrying
/// every UNS level populated. Entries are typically produced by joining
/// <c>Tag → Equipment → UnsLine → UnsArea</c> rows of the published generation against
/// the driver's discovered full references (or against <c>Tag.TagConfig</c> directly
/// when the walker is config-primary per ADR-001 Option A).
/// </summary>
public NodeScopeResolver(string clusterId, IReadOnlyDictionary<string, NodeScope> pathIndex)
{
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
ArgumentNullException.ThrowIfNull(pathIndex);
_clusterId = clusterId;
_index = pathIndex.ToFrozenDictionary(StringComparer.Ordinal);
}
/// <summary>
/// Resolve a node scope for the given driver-side <paramref name="fullReference"/>.
/// Returns the indexed full-path scope when available; falls back to cluster-only
/// (TagId populated only) when the index is absent or the reference isn't indexed.
/// The fallback is the same shape the pre-ADR-001 resolver produced, so the authz
/// evaluator behaves identically for un-indexed references.
/// </summary>
public NodeScope Resolve(string fullReference)
{
ArgumentException.ThrowIfNullOrWhiteSpace(fullReference);
if (_index is not null && _index.TryGetValue(fullReference, out var indexed))
return indexed;
return new NodeScope
{
ClusterId = _clusterId,
TagId = fullReference,
Kind = NodeHierarchyKind.Equipment,
};
}
}

View File

@@ -0,0 +1,81 @@
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Builds the <see cref="NodeScope"/> path index consumed by <see cref="NodeScopeResolver"/>
/// from a Config-DB snapshot of a single published generation. Runs once per generation
/// (or on every generation change) at the Server bootstrap layer; the produced index is
/// immutable + hot-path readable per ADR-001 Task B.
/// </summary>
/// <remarks>
/// <para>The index key is the driver-side full reference (<c>Tag.TagConfig</c>) — the same
/// string the dispatch layer passes to <see cref="NodeScopeResolver.Resolve"/>. The value
/// is a <see cref="NodeScope"/> with every UNS level populated:
/// <c>ClusterId / NamespaceId / UnsAreaId / UnsLineId / EquipmentId / TagId</c>. Tag rows
/// with null <c>EquipmentId</c> (SystemPlatform-namespace Galaxy tags per decision #120)
/// are excluded from the index — the cluster-only fallback path in the resolver handles
/// them without needing an index entry.</para>
///
/// <para>Duplicate keys are not expected but would be indicative of corrupt data — the
/// builder throws <see cref="InvalidOperationException"/> on collision so a config drift
/// surfaces at bootstrap instead of producing silently-last-wins scopes at dispatch.</para>
/// </remarks>
public static class ScopePathIndexBuilder
{
/// <summary>
/// Build a fullReference → NodeScope index from the four Config-DB collections for a
/// single namespace. Callers must filter inputs to a single
/// <see cref="Namespace"/> + the same <see cref="ConfigGeneration"/> upstream.
/// </summary>
/// <param name="clusterId">Owning cluster — populates <see cref="NodeScope.ClusterId"/>.</param>
/// <param name="namespaceId">Owning namespace — populates <see cref="NodeScope.NamespaceId"/>.</param>
/// <param name="content">Pre-loaded rows for the namespace.</param>
public static IReadOnlyDictionary<string, NodeScope> Build(
string clusterId,
string namespaceId,
EquipmentNamespaceContent content)
{
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
ArgumentException.ThrowIfNullOrWhiteSpace(namespaceId);
ArgumentNullException.ThrowIfNull(content);
var areaByLine = content.Lines.ToDictionary(l => l.UnsLineId, l => l.UnsAreaId, StringComparer.OrdinalIgnoreCase);
var lineByEquipment = content.Equipment.ToDictionary(e => e.EquipmentId, e => e.UnsLineId, StringComparer.OrdinalIgnoreCase);
var index = new Dictionary<string, NodeScope>(StringComparer.Ordinal);
foreach (var tag in content.Tags)
{
// Null EquipmentId = SystemPlatform-namespace tag per decision #110 — skip; the
// cluster-only resolver fallback handles those without needing an index entry.
if (string.IsNullOrEmpty(tag.EquipmentId)) continue;
// Broken FK — Tag references a missing Equipment row. Skip rather than crash;
// sp_ValidateDraft should have caught this at publish, so any drift here is
// unexpected but non-fatal.
if (!lineByEquipment.TryGetValue(tag.EquipmentId, out var lineId)) continue;
if (!areaByLine.TryGetValue(lineId, out var areaId)) continue;
var scope = new NodeScope
{
ClusterId = clusterId,
NamespaceId = namespaceId,
UnsAreaId = areaId,
UnsLineId = lineId,
EquipmentId = tag.EquipmentId,
TagId = tag.TagConfig,
Kind = NodeHierarchyKind.Equipment,
};
if (!index.TryAdd(tag.TagConfig, scope))
throw new InvalidOperationException(
$"Duplicate fullReference '{tag.TagConfig}' in Equipment namespace '{namespaceId}'. " +
"Config data is corrupt — two Tag rows produced the same wire-level address.");
}
return index;
}
}

View File

@@ -0,0 +1,88 @@
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
/// <summary>
/// Server-layer write-authorization policy. ACL enforcement lives here — drivers report
/// <see cref="SecurityClassification"/> as discovery metadata only; the server decides
/// whether a given session is allowed to write a given attribute by checking the session's
/// roles (resolved at login via <see cref="LdapUserAuthenticator"/>) against the required
/// role for the attribute's classification.
/// </summary>
/// <remarks>
/// Matches the table in <c>docs/Configuration.md</c>:
/// <list type="bullet">
/// <item><c>FreeAccess</c>: no role required — anonymous sessions can write (matches v1 default).</item>
/// <item><c>Operate</c> / <c>SecuredWrite</c>: <c>WriteOperate</c> role required.</item>
/// <item><c>Tune</c>: <c>WriteTune</c> role required.</item>
/// <item><c>VerifiedWrite</c> / <c>Configure</c>: <c>WriteConfigure</c> role required.</item>
/// <item><c>ViewOnly</c>: no role grants write access.</item>
/// </list>
/// <c>AlarmAck</c> is checked at the alarm-acknowledge path, not here.
/// </remarks>
public static class WriteAuthzPolicy
{
public const string RoleWriteOperate = "WriteOperate";
public const string RoleWriteTune = "WriteTune";
public const string RoleWriteConfigure = "WriteConfigure";
/// <summary>
/// Decide whether a session with <paramref name="userRoles"/> is allowed to write to an
/// attribute with the given <paramref name="classification"/>. Returns true for
/// <c>FreeAccess</c> regardless of roles (including empty / anonymous sessions) and
/// false for <c>ViewOnly</c> regardless of roles. Every other classification requires
/// the session to carry the mapped role — case-insensitive match.
/// </summary>
public static bool IsAllowed(SecurityClassification classification, IReadOnlyCollection<string> userRoles)
{
if (classification == SecurityClassification.FreeAccess) return true;
if (classification == SecurityClassification.ViewOnly) return false;
var required = RequiredRole(classification);
if (required is null) return false;
foreach (var r in userRoles)
{
if (string.Equals(r, required, StringComparison.OrdinalIgnoreCase))
return true;
}
return false;
}
/// <summary>
/// Required role for a classification, or null when no role grants access
/// (<see cref="SecurityClassification.ViewOnly"/>) or no role is needed
/// (<see cref="SecurityClassification.FreeAccess"/> — also returns null; callers use
/// <see cref="IsAllowed"/> which handles the special-cases rather than branching on
/// null themselves).
/// </summary>
public static string? RequiredRole(SecurityClassification classification) => classification switch
{
SecurityClassification.FreeAccess => null, // IsAllowed short-circuits
SecurityClassification.Operate => RoleWriteOperate,
SecurityClassification.SecuredWrite => RoleWriteOperate,
SecurityClassification.Tune => RoleWriteTune,
SecurityClassification.VerifiedWrite => RoleWriteConfigure,
SecurityClassification.Configure => RoleWriteConfigure,
SecurityClassification.ViewOnly => null, // IsAllowed short-circuits
_ => null,
};
/// <summary>
/// Maps a driver-reported <see cref="SecurityClassification"/> to the
/// <see cref="Core.Abstractions.OpcUaOperation"/> the Phase 6.2 evaluator consults
/// for the matching <see cref="Configuration.Enums.NodePermissions"/> bit.
/// FreeAccess + ViewOnly fall back to WriteOperate — the evaluator never sees them
/// because <see cref="IsAllowed"/> short-circuits first.
/// </summary>
public static Core.Abstractions.OpcUaOperation ToOpcUaOperation(SecurityClassification classification) =>
classification switch
{
SecurityClassification.Operate => Core.Abstractions.OpcUaOperation.WriteOperate,
SecurityClassification.SecuredWrite => Core.Abstractions.OpcUaOperation.WriteOperate,
SecurityClassification.Tune => Core.Abstractions.OpcUaOperation.WriteTune,
SecurityClassification.VerifiedWrite => Core.Abstractions.OpcUaOperation.WriteConfigure,
SecurityClassification.Configure => Core.Abstractions.OpcUaOperation.WriteConfigure,
_ => Core.Abstractions.OpcUaOperation.WriteOperate,
};
}

View File

@@ -0,0 +1,66 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<LangVersion>latest</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<RootNamespace>ZB.MOM.WW.OtOpcUa.Server</RootNamespace>
<AssemblyName>OtOpcUa.Server</AssemblyName>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.0"/>
<PackageReference Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.0"/>
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0"/>
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="10.0.0"/>
<PackageReference Include="Serilog.Extensions.Hosting" Version="9.0.0"/>
<PackageReference Include="Serilog.Settings.Configuration" Version="9.0.0"/>
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0"/>
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0"/>
<PackageReference Include="Serilog.Formatting.Compact" Version="3.0.0"/>
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126"/>
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.374.126"/>
<PackageReference Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0"/>
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.0"/>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core\ZB.MOM.WW.OtOpcUa.Core.csproj"/>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Scripting\ZB.MOM.WW.OtOpcUa.Core.Scripting.csproj"/>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.VirtualTags\ZB.MOM.WW.OtOpcUa.Core.VirtualTags.csproj"/>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms\ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.csproj"/>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Galaxy\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.FOCAS\ZB.MOM.WW.OtOpcUa.Driver.FOCAS.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Modbus\ZB.MOM.WW.OtOpcUa.Driver.Modbus.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.AbCip\ZB.MOM.WW.OtOpcUa.Driver.AbCip.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.S7\ZB.MOM.WW.OtOpcUa.Driver.S7.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.csproj"/>
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.csproj"/>
<ProjectReference Include="..\..\Tooling\ZB.MOM.WW.OtOpcUa.Analyzers\ZB.MOM.WW.OtOpcUa.Analyzers.csproj"
OutputItemType="Analyzer" ReferenceOutputAssembly="false"/>
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Server.Tests"/>
</ItemGroup>
<ItemGroup>
<None Update="appsettings.json" CopyToOutputDirectory="PreserveNewest"/>
</ItemGroup>
<ItemGroup>
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-37gx-xxp4-5rgx"/>
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-w3x6-4m5h-cxqf"/>
<!-- OPCFoundation.NetStandard.Opc.Ua.Core advisory — v1 already uses this package at the
same version, risk already accepted in the v1 stack. -->
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-h958-fxgg-g7w3"/>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,26 @@
{
"Serilog": {
"MinimumLevel": "Information"
},
"Node": {
"NodeId": "node-dev-a",
"ClusterId": "cluster-dev",
"ConfigDbConnectionString": "Server=10.100.0.35,14330;Database=OtOpcUaConfig;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;",
"LocalCachePath": "config_cache.db"
},
"Historian": {
"Wonderware": {
"Enabled": false,
"PipeName": "OtOpcUaWonderwareHistorian",
"SharedSecret": "",
"PeerName": "OtOpcUa-node-dev-a",
"DriverInstancePrefix": "galaxy",
"ConnectTimeoutSeconds": 10,
"CallTimeoutSeconds": 30
}
},
"Galaxy": {
"$comment": "PR 7.1 — DefaultBackend selects which factory tooling and migration scripts pick when authoring NEW Galaxy DriverInstance rows. Both factories register at startup so existing rows keep working: 'GalaxyMxGateway' (default since PR 7.1, in-process .NET 10 driver over the mxaccessgw gRPC gateway) and 'Galaxy' (legacy out-of-process Galaxy.Host EXE + named pipe). Legacy registration retires in PR 7.2 once the parity matrix in docs/v2/Galaxy.ParityMatrix.md is fully green on the parity rig.",
"DefaultBackend": "GalaxyMxGateway"
}
}