chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,289 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
|
||||
/// <summary>
|
||||
/// Server-level alarm-condition state machine. Tracks one entry per registered
|
||||
/// condition; consumes value changes from the four sub-attribute references in
|
||||
/// <see cref="AlarmConditionInfo"/> (InAlarm / Priority / Description / Acked) and
|
||||
/// raises <see cref="TransitionRaised"/> on Active / Acknowledged / Inactive
|
||||
/// transitions per OPC UA Part 9 (simplified). Operator acknowledgement routes
|
||||
/// through <see cref="IAlarmAcknowledger"/> against <c>AckMsgWriteRef</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This is the driver-agnostic replacement for <c>GalaxyAlarmTracker</c>. The
|
||||
/// service does not own subscription lifecycle — PR 2.3 will wire DriverNodeManager
|
||||
/// to subscribe through the driver's <c>ISubscribable</c> and forward value changes
|
||||
/// here via <see cref="OnValueChanged"/>. Keeping the service free of subscription
|
||||
/// plumbing makes it trivially testable and lets future drivers feed it from any
|
||||
/// value source (in-process, gRPC, named pipe).
|
||||
/// </remarks>
|
||||
public sealed class AlarmConditionService : IDisposable
|
||||
{
|
||||
private readonly Func<DateTime> _clock;
|
||||
|
||||
// ConditionId → state.
|
||||
private readonly ConcurrentDictionary<string, AlarmConditionState> _conditions =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Sub-attribute full ref → (conditionId, which field). Multiple conditions may
|
||||
// observe the same sub-attribute (rare but legal); the value is a list to support
|
||||
// fan-out on a single value change.
|
||||
private readonly ConcurrentDictionary<string, List<(string ConditionId, AlarmField Field)>> _refToCondition =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private readonly object _refMapLock = new();
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Fired when a registered condition transitions Active / Acknowledged / Inactive.
|
||||
/// Handlers must be cheap; the event is raised on whatever thread feeds
|
||||
/// <see cref="OnValueChanged"/> and blocks the value-change pipeline.
|
||||
/// </summary>
|
||||
public event EventHandler<AlarmConditionTransition>? TransitionRaised;
|
||||
|
||||
public AlarmConditionService() : this(() => DateTime.UtcNow) { }
|
||||
|
||||
/// <summary>Test seam — inject a fixed clock for deterministic transition timestamps.</summary>
|
||||
internal AlarmConditionService(Func<DateTime> clock)
|
||||
{
|
||||
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
|
||||
}
|
||||
|
||||
/// <summary>Number of currently tracked conditions. Diagnostic only.</summary>
|
||||
public int TrackedCount => _conditions.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Register a condition. Idempotent — repeat calls for the same
|
||||
/// <paramref name="conditionId"/> are a no-op. The acker is captured for the
|
||||
/// condition's lifetime; pass null when the driver does not accept acks.
|
||||
/// </summary>
|
||||
public void Track(string conditionId, AlarmConditionInfo info, IAlarmAcknowledger? acker = null)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(conditionId);
|
||||
ArgumentNullException.ThrowIfNull(info);
|
||||
|
||||
var state = new AlarmConditionState(conditionId, info, acker);
|
||||
if (!_conditions.TryAdd(conditionId, state)) return;
|
||||
|
||||
lock (_refMapLock)
|
||||
{
|
||||
AddRefMapping(info.InAlarmRef, conditionId, AlarmField.InAlarm);
|
||||
AddRefMapping(info.PriorityRef, conditionId, AlarmField.Priority);
|
||||
AddRefMapping(info.DescAttrNameRef, conditionId, AlarmField.DescAttrName);
|
||||
AddRefMapping(info.AckedRef, conditionId, AlarmField.Acked);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Deregister a condition. No-op when not tracked.</summary>
|
||||
public void Untrack(string conditionId)
|
||||
{
|
||||
if (_disposed) return;
|
||||
if (!_conditions.TryRemove(conditionId, out var state)) return;
|
||||
|
||||
lock (_refMapLock)
|
||||
{
|
||||
RemoveRefMapping(state.Info.InAlarmRef, conditionId);
|
||||
RemoveRefMapping(state.Info.PriorityRef, conditionId);
|
||||
RemoveRefMapping(state.Info.DescAttrNameRef, conditionId);
|
||||
RemoveRefMapping(state.Info.AckedRef, conditionId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the set of sub-attribute references the service currently needs
|
||||
/// subscribed. Callers wire one subscription per ref through the driver's
|
||||
/// <see cref="ISubscribable"/>; PR 2.3 owns that wiring.
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<string> GetSubscribedReferences()
|
||||
{
|
||||
lock (_refMapLock) return [.. _refToCondition.Keys];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Operator acknowledgement entry point. Returns false when the condition is
|
||||
/// not tracked, the condition has no acker registered, the condition has no
|
||||
/// <c>AckMsgWriteRef</c>, or the acker reports the write failed.
|
||||
/// </summary>
|
||||
public Task<bool> AcknowledgeAsync(string conditionId, string comment, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_disposed || !_conditions.TryGetValue(conditionId, out var state))
|
||||
return Task.FromResult(false);
|
||||
if (state.Acker is null || string.IsNullOrEmpty(state.Info.AckMsgWriteRef))
|
||||
return Task.FromResult(false);
|
||||
return state.Acker.WriteAckMessageAsync(state.Info.AckMsgWriteRef, comment ?? string.Empty, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot every tracked condition's current state. Diagnostic / dashboard use only.
|
||||
/// </summary>
|
||||
public IReadOnlyList<AlarmConditionSnapshot> Snapshot()
|
||||
{
|
||||
return [.. _conditions.Values.Select(s =>
|
||||
{
|
||||
lock (s.Lock)
|
||||
return new AlarmConditionSnapshot(s.ConditionId, s.InAlarm, s.Acked, s.Priority, s.Description);
|
||||
})];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Feed a value change for one of the registered sub-attribute references.
|
||||
/// The service runs the state machine and raises <see cref="TransitionRaised"/>
|
||||
/// when the change produces a lifecycle transition. Unknown references are
|
||||
/// silently dropped — the caller may register and unregister concurrently with
|
||||
/// value-change delivery, and a stale callback for a recently-untracked
|
||||
/// condition must not throw.
|
||||
/// </summary>
|
||||
public void OnValueChanged(string fullReference, DataValueSnapshot value)
|
||||
{
|
||||
if (_disposed) return;
|
||||
if (string.IsNullOrEmpty(fullReference)) return;
|
||||
|
||||
List<(string ConditionId, AlarmField Field)>? targets;
|
||||
lock (_refMapLock)
|
||||
{
|
||||
if (!_refToCondition.TryGetValue(fullReference, out targets) || targets.Count == 0) return;
|
||||
// Snapshot under lock; the state machine runs outside.
|
||||
targets = [.. targets];
|
||||
}
|
||||
|
||||
var now = _clock();
|
||||
foreach (var (conditionId, field) in targets)
|
||||
{
|
||||
if (!_conditions.TryGetValue(conditionId, out var state)) continue;
|
||||
|
||||
AlarmConditionTransition? transition = null;
|
||||
lock (state.Lock)
|
||||
{
|
||||
transition = ApplyValue(state, field, value, now);
|
||||
}
|
||||
|
||||
if (transition is { } t)
|
||||
{
|
||||
TransitionRaised?.Invoke(this, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply one value change to one condition. Returns a transition when the
|
||||
/// change crosses a state boundary; null otherwise. Caller holds <c>state.Lock</c>.
|
||||
/// </summary>
|
||||
private static AlarmConditionTransition? ApplyValue(
|
||||
AlarmConditionState state, AlarmField field, DataValueSnapshot value, DateTime now)
|
||||
{
|
||||
AlarmConditionTransition? transition = null;
|
||||
state.LastUpdateUtc = now;
|
||||
|
||||
switch (field)
|
||||
{
|
||||
case AlarmField.InAlarm:
|
||||
{
|
||||
var wasActive = state.InAlarm;
|
||||
var isActive = value.Value is bool b && b;
|
||||
state.InAlarm = isActive;
|
||||
if (!wasActive && isActive)
|
||||
{
|
||||
// Reset Acked on every active transition so a re-alarm requires fresh ack.
|
||||
state.Acked = false;
|
||||
transition = new AlarmConditionTransition(
|
||||
state.ConditionId, AlarmStateTransition.Active,
|
||||
state.Priority, state.Description, now);
|
||||
}
|
||||
else if (wasActive && !isActive)
|
||||
{
|
||||
transition = new AlarmConditionTransition(
|
||||
state.ConditionId, AlarmStateTransition.Inactive,
|
||||
state.Priority, state.Description, now);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AlarmField.Priority:
|
||||
state.Priority = CoercePriority(value.Value, state.Priority);
|
||||
break;
|
||||
case AlarmField.DescAttrName:
|
||||
state.Description = value.Value as string;
|
||||
break;
|
||||
case AlarmField.Acked:
|
||||
{
|
||||
var wasAcked = state.Acked;
|
||||
var isAcked = value.Value is bool b && b;
|
||||
state.Acked = isAcked;
|
||||
// Only fire Acknowledged on false → true while still active. The first
|
||||
// post-Track callback often arrives with isAcked == wasAcked (state starts
|
||||
// Acked=true so an initially-quiet alarm doesn't misfire).
|
||||
if (!wasAcked && isAcked && state.InAlarm)
|
||||
{
|
||||
transition = new AlarmConditionTransition(
|
||||
state.ConditionId, AlarmStateTransition.Acknowledged,
|
||||
state.Priority, state.Description, now);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return transition;
|
||||
}
|
||||
|
||||
private static int CoercePriority(object? raw, int fallback) => raw switch
|
||||
{
|
||||
int i => i,
|
||||
short s => s,
|
||||
long l when l <= int.MaxValue => (int)l,
|
||||
byte b => b,
|
||||
ushort us => us,
|
||||
uint ui when ui <= int.MaxValue => (int)ui,
|
||||
_ => fallback,
|
||||
};
|
||||
|
||||
private void AddRefMapping(string? fullRef, string conditionId, AlarmField field)
|
||||
{
|
||||
if (string.IsNullOrEmpty(fullRef)) return;
|
||||
if (!_refToCondition.TryGetValue(fullRef, out var list))
|
||||
{
|
||||
list = [];
|
||||
_refToCondition[fullRef] = list;
|
||||
}
|
||||
list.Add((conditionId, field));
|
||||
}
|
||||
|
||||
private void RemoveRefMapping(string? fullRef, string conditionId)
|
||||
{
|
||||
if (string.IsNullOrEmpty(fullRef)) return;
|
||||
if (!_refToCondition.TryGetValue(fullRef, out var list)) return;
|
||||
list.RemoveAll(t => string.Equals(t.ConditionId, conditionId, StringComparison.OrdinalIgnoreCase));
|
||||
if (list.Count == 0) _refToCondition.TryRemove(fullRef, out _);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_conditions.Clear();
|
||||
lock (_refMapLock) _refToCondition.Clear();
|
||||
}
|
||||
|
||||
private enum AlarmField { InAlarm, Priority, DescAttrName, Acked }
|
||||
|
||||
/// <summary>Per-condition mutable state. Access guarded by <see cref="Lock"/>.</summary>
|
||||
private sealed class AlarmConditionState(string conditionId, AlarmConditionInfo info, IAlarmAcknowledger? acker)
|
||||
{
|
||||
public readonly object Lock = new();
|
||||
public string ConditionId { get; } = conditionId;
|
||||
public AlarmConditionInfo Info { get; } = info;
|
||||
public IAlarmAcknowledger? Acker { get; } = acker;
|
||||
|
||||
public bool InAlarm;
|
||||
|
||||
// Default Acked=true so the first post-Track callback (.Acked=true on a quiet
|
||||
// alarm) doesn't misfire as a transition. Active sets it back to false.
|
||||
public bool Acked = true;
|
||||
|
||||
public int Priority;
|
||||
public string? Description;
|
||||
public DateTime LastUpdateUtc;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
|
||||
/// <summary>
|
||||
/// Lifecycle transition for an alarm condition. Mirrors OPC UA Part 9 alarm states
|
||||
/// simplified to the active / acknowledged / inactive triplet that every driver in
|
||||
/// the repo exposes today.
|
||||
/// </summary>
|
||||
public enum AlarmStateTransition
|
||||
{
|
||||
/// <summary>InAlarm flipped false → true. Default to unacknowledged.</summary>
|
||||
Active,
|
||||
|
||||
/// <summary>Acked flipped false → true while the alarm is still active.</summary>
|
||||
Acknowledged,
|
||||
|
||||
/// <summary>InAlarm flipped true → false.</summary>
|
||||
Inactive,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One alarm-state transition raised by <see cref="AlarmConditionService.TransitionRaised"/>.
|
||||
/// </summary>
|
||||
/// <param name="ConditionId">Stable identifier the caller registered the condition under (typically the driver's alarm full reference).</param>
|
||||
/// <param name="Transition">Which state the alarm transitioned to.</param>
|
||||
/// <param name="Priority">Latest known priority. 0 when no priority sub-attribute was registered or no value has been observed yet.</param>
|
||||
/// <param name="Description">Latest known description text; null when not registered or not yet observed.</param>
|
||||
/// <param name="AtUtc">Server-clock UTC of the value change that produced this transition.</param>
|
||||
public sealed record AlarmConditionTransition(
|
||||
string ConditionId,
|
||||
AlarmStateTransition Transition,
|
||||
int Priority,
|
||||
string? Description,
|
||||
DateTime AtUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Read-only snapshot of an alarm condition's current state. Used for diagnostics
|
||||
/// and dashboards; not part of the live transition stream.
|
||||
/// </summary>
|
||||
public sealed record AlarmConditionSnapshot(
|
||||
string ConditionId,
|
||||
bool InAlarm,
|
||||
bool Acked,
|
||||
int Priority,
|
||||
string? Description);
|
||||
@@ -0,0 +1,23 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
|
||||
/// <summary>
|
||||
/// Strategy for routing operator acknowledgement writes back to the underlying driver.
|
||||
/// Decouples <see cref="AlarmConditionService"/> from any specific driver's write API
|
||||
/// so the service can be tested without a real driver and reused across drivers with
|
||||
/// different write paths.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// PR 2.3 supplies a default implementation that writes through the driver's
|
||||
/// <c>IWritable.WriteAsync</c> using the <c>AckMsgWriteRef</c> from
|
||||
/// <c>AlarmConditionInfo</c>. Drivers that route acks differently (e.g. a dedicated
|
||||
/// RPC) can supply a custom implementation when registering the condition.
|
||||
/// </remarks>
|
||||
public interface IAlarmAcknowledger
|
||||
{
|
||||
/// <summary>
|
||||
/// Writes the operator's <paramref name="comment"/> to <paramref name="ackMsgWriteRef"/>.
|
||||
/// Returns true on driver-reported success, false otherwise. Implementations should
|
||||
/// propagate cancellation but never throw on a write that the driver cleanly rejects.
|
||||
/// </summary>
|
||||
Task<bool> WriteAckMessageAsync(string ackMsgWriteRef, string comment, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Task #248 — bridges the gap surfaced by the Phase 7 live smoke (#240) where
|
||||
/// <c>DriverInstance</c> rows in the central config DB had no path to materialise
|
||||
/// as live <see cref="Core.Abstractions.IDriver"/> instances in <see cref="DriverHost"/>.
|
||||
/// Called from <c>OpcUaServerService.ExecuteAsync</c> after the bootstrap loads
|
||||
/// the published generation, before address-space build.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Per row: looks up the <c>DriverType</c> string in
|
||||
/// <see cref="DriverFactoryRegistry"/>, calls the factory with the row's
|
||||
/// <c>DriverInstanceId</c> + <c>DriverConfig</c> JSON to construct an
|
||||
/// <see cref="Core.Abstractions.IDriver"/>, then registers via
|
||||
/// <see cref="DriverHost.RegisterAsync"/> which invokes <c>InitializeAsync</c>
|
||||
/// under the host's lifecycle semantics.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Unknown <c>DriverType</c> = factory not registered = log a warning and skip.
|
||||
/// Per plan decision #12 (driver isolation), failure to construct or initialize
|
||||
/// one driver doesn't prevent the rest from coming up — the Server keeps serving
|
||||
/// the others' subtrees + the operator can fix the misconfigured row + republish
|
||||
/// to retry.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class DriverInstanceBootstrapper(
|
||||
DriverFactoryRegistry factories,
|
||||
DriverHost driverHost,
|
||||
ScheduledRecycleHostedService recycleHost,
|
||||
ILoggerFactory loggerFactory,
|
||||
IServiceScopeFactory scopeFactory,
|
||||
ILogger<DriverInstanceBootstrapper> logger)
|
||||
{
|
||||
// IDriverSupervisor instances, looked up by DriverInstanceId. The bootstrapper
|
||||
// consults DI at run time because no driver ships a supervisor today — the
|
||||
// dictionary is built from optional DI registrations; Tier C drivers that
|
||||
// register one via `services.AddKeyedSingleton<IDriverSupervisor>(instanceId, ...)`
|
||||
// become eligible for scheduled recycle. Others silently skip.
|
||||
private readonly IReadOnlyDictionary<string, IDriverSupervisor> _supervisors =
|
||||
scopeFactory.CreateScope().ServiceProvider
|
||||
.GetServices<IDriverSupervisor>()
|
||||
.ToDictionary(s => s.DriverInstanceId, StringComparer.OrdinalIgnoreCase);
|
||||
public async Task<int> RegisterDriversFromGenerationAsync(long generationId, CancellationToken ct)
|
||||
{
|
||||
using var scope = scopeFactory.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
|
||||
var rows = await db.DriverInstances.AsNoTracking()
|
||||
.Where(d => d.GenerationId == generationId && d.Enabled)
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
var registered = 0;
|
||||
var skippedUnknownType = 0;
|
||||
var failedInit = 0;
|
||||
|
||||
foreach (var row in rows)
|
||||
{
|
||||
var factory = factories.TryGet(row.DriverType);
|
||||
if (factory is null)
|
||||
{
|
||||
logger.LogWarning(
|
||||
"DriverInstance {Id} skipped — DriverType '{Type}' has no registered factory (known: {Known})",
|
||||
row.DriverInstanceId, row.DriverType, string.Join(",", factories.RegisteredTypes));
|
||||
skippedUnknownType++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var driver = factory(row.DriverInstanceId, row.DriverConfig);
|
||||
await driverHost.RegisterAsync(driver, row.DriverConfig, ct).ConfigureAwait(false);
|
||||
registered++;
|
||||
logger.LogInformation(
|
||||
"DriverInstance {Id} ({Type}) registered + initialized", row.DriverInstanceId, row.DriverType);
|
||||
|
||||
// Scheduled-recycle opt-in — only meaningful for Tier C out-of-process hosts,
|
||||
// and only when the row's ResilienceConfig carries a positive
|
||||
// RecycleIntervalSeconds AND the deployment wired an IDriverSupervisor for
|
||||
// this DriverInstanceId. Silently skipping when any of those is absent is the
|
||||
// intended zero-config-default behaviour.
|
||||
TryRegisterScheduledRecycle(row.DriverInstanceId, row.DriverType, row.ResilienceConfig);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Plan decision #12 — driver isolation. Log + continue so one bad row
|
||||
// doesn't deny the OPC UA endpoint to the rest of the fleet.
|
||||
logger.LogError(ex,
|
||||
"DriverInstance {Id} ({Type}) failed to initialize — driver state will reflect Faulted; operator can republish to retry",
|
||||
row.DriverInstanceId, row.DriverType);
|
||||
failedInit++;
|
||||
}
|
||||
}
|
||||
|
||||
logger.LogInformation(
|
||||
"DriverInstanceBootstrapper: gen={Gen} registered={Registered} skippedUnknownType={Skipped} failedInit={Failed}",
|
||||
generationId, registered, skippedUnknownType, failedInit);
|
||||
return registered;
|
||||
}
|
||||
|
||||
private void TryRegisterScheduledRecycle(string driverInstanceId, string driverType, string? resilienceJson)
|
||||
{
|
||||
var tier = factories.GetTier(driverType);
|
||||
if (tier != DriverTier.C) return;
|
||||
|
||||
var options = DriverResilienceOptionsParser.ParseOrDefaults(tier, resilienceJson, out _);
|
||||
if (options.RecycleIntervalSeconds is not int secs) return;
|
||||
|
||||
if (!_supervisors.TryGetValue(driverInstanceId, out var supervisor))
|
||||
{
|
||||
logger.LogWarning(
|
||||
"DriverInstance {Id} ({Type}) has RecycleIntervalSeconds={Secs} in ResilienceConfig but no IDriverSupervisor registered; scheduled recycle will not fire",
|
||||
driverInstanceId, driverType, secs);
|
||||
return;
|
||||
}
|
||||
|
||||
var scheduler = new ScheduledRecycleScheduler(
|
||||
tier,
|
||||
TimeSpan.FromSeconds(secs),
|
||||
DateTime.UtcNow,
|
||||
supervisor,
|
||||
loggerFactory.CreateLogger<ScheduledRecycleScheduler>());
|
||||
recycleHost.AddScheduler(scheduler);
|
||||
logger.LogInformation(
|
||||
"Scheduled recycle armed for Tier C driver {Id} ({Type}) — interval {Interval}, first fire at {Next:o}",
|
||||
driverInstanceId, driverType, TimeSpan.FromSeconds(secs), scheduler.NextRecycleUtc);
|
||||
}
|
||||
}
|
||||
71
src/Server/ZB.MOM.WW.OtOpcUa.Server/History/HistoryRouter.cs
Normal file
71
src/Server/ZB.MOM.WW.OtOpcUa.Server/History/HistoryRouter.cs
Normal file
@@ -0,0 +1,71 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IHistoryRouter"/> implementation.
|
||||
/// </summary>
|
||||
public sealed class HistoryRouter : IHistoryRouter
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, IHistorianDataSource> _registry =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Register(string fullReferencePrefix, IHistorianDataSource source)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(fullReferencePrefix);
|
||||
ArgumentNullException.ThrowIfNull(source);
|
||||
|
||||
if (!_registry.TryAdd(fullReferencePrefix, source))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"A historian data source is already registered for prefix '{fullReferencePrefix}'.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IHistorianDataSource? Resolve(string fullReference)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
ArgumentNullException.ThrowIfNull(fullReference);
|
||||
|
||||
// Longest-prefix match. Sources are typically a handful per server, so a linear
|
||||
// scan is fine and avoids building a trie for a low-cardinality registry.
|
||||
IHistorianDataSource? best = null;
|
||||
var bestPrefixLength = -1;
|
||||
|
||||
foreach (var (prefix, source) in _registry)
|
||||
{
|
||||
if (fullReference.StartsWith(prefix, StringComparison.OrdinalIgnoreCase)
|
||||
&& prefix.Length > bestPrefixLength)
|
||||
{
|
||||
best = source;
|
||||
bestPrefixLength = prefix.Length;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disposes every registered source and prevents further registrations or
|
||||
/// resolutions. Sources may not all be disposable — null-safe disposal pattern.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
foreach (var source in _registry.Values)
|
||||
{
|
||||
try { source.Dispose(); }
|
||||
catch { /* best-effort — server shutdown should not throw on a misbehaving source */ }
|
||||
}
|
||||
|
||||
_registry.Clear();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
|
||||
/// <summary>
|
||||
/// Server-level routing of OPC UA HistoryRead service calls to a registered
|
||||
/// <see cref="IHistorianDataSource"/>. One router per server instance; sources are
|
||||
/// registered at startup keyed by a driver-side full-reference prefix (typically the
|
||||
/// driver instance id).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The router decouples history availability from the driver lifecycle: a driver
|
||||
/// can restart (or be temporarily disconnected) without taking history offline,
|
||||
/// and a single historian can serve nodes from multiple drivers.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Resolution is by longest-prefix match so a per-driver source registered under
|
||||
/// <c>"galaxy"</c> wins over a fallback registered under empty string.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public interface IHistoryRouter : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves a full reference to its registered data source, or null when no source
|
||||
/// covers it.
|
||||
/// </summary>
|
||||
IHistorianDataSource? Resolve(string fullReference);
|
||||
|
||||
/// <summary>
|
||||
/// Registers a data source for full references that start with
|
||||
/// <paramref name="fullReferencePrefix"/>. Throws when the prefix is already
|
||||
/// registered — duplicate registrations indicate a startup-config bug rather than
|
||||
/// a runtime concern.
|
||||
/// </summary>
|
||||
void Register(string fullReferencePrefix, IHistorianDataSource source);
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
|
||||
/// <summary>
|
||||
/// Hosted service that registers the configured <see cref="WonderwareHistorianClient"/>
|
||||
/// as a source on the server-level <see cref="IHistoryRouter"/> at startup. Per-namespace
|
||||
/// prefix is the driver instance id the operator binds the historian to (typically
|
||||
/// "galaxy"); future per-area or per-equipment overrides can register under longer prefixes.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// PR 3.W only wires this when <c>Historian:Wonderware:Enabled=true</c> in config. The
|
||||
/// hosted service does its work in <see cref="StartAsync"/> and stays passive afterward;
|
||||
/// <see cref="StopAsync"/> is a no-op since router disposal happens through the singleton's
|
||||
/// own DI lifecycle.
|
||||
/// </remarks>
|
||||
public sealed class WonderwareHistorianBootstrap : IHostedService
|
||||
{
|
||||
private readonly IHistoryRouter _router;
|
||||
private readonly WonderwareHistorianClient _client;
|
||||
private readonly string _prefix;
|
||||
private readonly ILogger<WonderwareHistorianBootstrap> _logger;
|
||||
|
||||
public WonderwareHistorianBootstrap(
|
||||
IHistoryRouter router,
|
||||
WonderwareHistorianClient client,
|
||||
string fullReferencePrefix,
|
||||
ILogger<WonderwareHistorianBootstrap> logger)
|
||||
{
|
||||
_router = router ?? throw new ArgumentNullException(nameof(router));
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_prefix = fullReferencePrefix ?? throw new ArgumentNullException(nameof(fullReferencePrefix));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
_router.Register(_prefix, (IHistorianDataSource)_client);
|
||||
_logger.LogInformation(
|
||||
"Wonderware historian sidecar registered as IHistoryRouter source under prefix '{Prefix}'",
|
||||
_prefix);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
// Prefix already registered (e.g. server restart without DI rebuild). Tolerate
|
||||
// — the existing registration is the same singleton instance and stays valid.
|
||||
_logger.LogWarning(ex,
|
||||
"Wonderware historian source already registered for prefix '{Prefix}' — leaving existing entry", _prefix);
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||
}
|
||||
143
src/Server/ZB.MOM.WW.OtOpcUa.Server/HostStatusPublisher.cs
Normal file
143
src/Server/ZB.MOM.WW.OtOpcUa.Server/HostStatusPublisher.cs
Normal file
@@ -0,0 +1,143 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Walks every registered driver once per heartbeat interval, asks each
|
||||
/// <see cref="IHostConnectivityProbe"/>-capable driver for its current
|
||||
/// <see cref="HostConnectivityStatus"/> list, and upserts one
|
||||
/// <see cref="DriverHostStatus"/> row per (NodeId, DriverInstanceId, HostName) into the
|
||||
/// central config DB. Powers the Admin UI's per-host drill-down page (LMX follow-up #7).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Polling rather than event-driven: simpler, and matches the cadence the Admin UI
|
||||
/// consumes. An event-subscription optimization (push on <c>OnHostStatusChanged</c> for
|
||||
/// immediate reflection) is a straightforward follow-up but adds lifecycle complexity
|
||||
/// — drivers can be registered after the publisher starts, and subscribing to each
|
||||
/// one's event on register + unsubscribing on unregister requires DriverHost to expose
|
||||
/// lifecycle events it doesn't today.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <see cref="DriverHostStatus.LastSeenUtc"/> advances every heartbeat so the Admin UI
|
||||
/// can flag stale rows from a crashed Server process independent of
|
||||
/// <see cref="DriverHostStatus.State"/> — a Faulted publisher that stops heartbeating
|
||||
/// stays Faulted in the DB but its LastSeenUtc ages out, which is the signal
|
||||
/// operators actually want.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// If the DB is unreachable on a given tick, the publisher logs and moves on — it
|
||||
/// does not retry or buffer. The next heartbeat picks up the current-state snapshot,
|
||||
/// which is more useful than replaying stale transitions after a long outage.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class HostStatusPublisher(
|
||||
DriverHost driverHost,
|
||||
NodeOptions nodeOptions,
|
||||
IServiceScopeFactory scopeFactory,
|
||||
ILogger<HostStatusPublisher> logger) : BackgroundService
|
||||
{
|
||||
internal static readonly TimeSpan HeartbeatInterval = TimeSpan.FromSeconds(10);
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
// Wait a short moment at startup so NodeBootstrap's RegisterAsync calls have had a
|
||||
// chance to land. First tick runs immediately after so a freshly-started Server
|
||||
// surfaces its host topology in the Admin UI without waiting a full interval.
|
||||
try { await Task.Delay(TimeSpan.FromSeconds(2), stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try { await PublishOnceAsync(stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Never take down the Server on a publisher failure. Log and continue —
|
||||
// stale-row detection on the Admin side will surface the outage.
|
||||
logger.LogWarning(ex, "Host-status publisher tick failed — will retry next heartbeat");
|
||||
}
|
||||
|
||||
try { await Task.Delay(HeartbeatInterval, stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
}
|
||||
}
|
||||
|
||||
internal async Task PublishOnceAsync(CancellationToken ct)
|
||||
{
|
||||
var driverIds = driverHost.RegisteredDriverIds;
|
||||
if (driverIds.Count == 0) return;
|
||||
|
||||
var now = DateTime.UtcNow;
|
||||
using var scope = scopeFactory.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
|
||||
foreach (var driverId in driverIds)
|
||||
{
|
||||
var driver = driverHost.GetDriver(driverId);
|
||||
if (driver is not IHostConnectivityProbe probe) continue;
|
||||
|
||||
IReadOnlyList<HostConnectivityStatus> statuses;
|
||||
try { statuses = probe.GetHostStatuses(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Driver {DriverId} GetHostStatuses threw — skipping this tick", driverId);
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var status in statuses)
|
||||
{
|
||||
await UpsertAsync(db, driverId, status, now, ct);
|
||||
}
|
||||
}
|
||||
|
||||
await db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
private async Task UpsertAsync(OtOpcUaConfigDbContext db, string driverId,
|
||||
HostConnectivityStatus status, DateTime now, CancellationToken ct)
|
||||
{
|
||||
var mapped = MapState(status.State);
|
||||
var existing = await db.DriverHostStatuses.SingleOrDefaultAsync(r =>
|
||||
r.NodeId == nodeOptions.NodeId
|
||||
&& r.DriverInstanceId == driverId
|
||||
&& r.HostName == status.HostName, ct);
|
||||
|
||||
if (existing is null)
|
||||
{
|
||||
db.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = nodeOptions.NodeId,
|
||||
DriverInstanceId = driverId,
|
||||
HostName = status.HostName,
|
||||
State = mapped,
|
||||
StateChangedUtc = status.LastChangedUtc,
|
||||
LastSeenUtc = now,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
existing.LastSeenUtc = now;
|
||||
if (existing.State != mapped)
|
||||
{
|
||||
existing.State = mapped;
|
||||
existing.StateChangedUtc = status.LastChangedUtc;
|
||||
}
|
||||
}
|
||||
|
||||
internal static DriverHostState MapState(HostState state) => state switch
|
||||
{
|
||||
HostState.Running => DriverHostState.Running,
|
||||
HostState.Stopped => DriverHostState.Stopped,
|
||||
HostState.Faulted => DriverHostState.Faulted,
|
||||
_ => DriverHostState.Unknown,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 A.2 + Phase 6.1 Stream D follow-up — polls
|
||||
/// <c>sp_GetCurrentGenerationForCluster</c> on a cadence and, when a newer generation
|
||||
/// is detected, wraps the apply in an <see cref="ApplyLeaseRegistry"/> lease
|
||||
/// (flipping ServiceLevel to <see cref="ServiceLevelBand.PrimaryMidApply"/>) and
|
||||
/// refreshes the <see cref="RedundancyCoordinator"/> so operator role-swaps take
|
||||
/// effect without a process restart.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Before this service shipped, the Server only ever saw the generation in force
|
||||
/// at process start (<see cref="SealedBootstrap"/>). Peer-published generations
|
||||
/// silently accumulated in the shared config DB; the running node kept serving
|
||||
/// the generation it had sealed on boot until the operator restarted it.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Closes the Phase 6.3 D.1 design hole around <c>PrimaryMidApply</c>: the
|
||||
/// <c>coordinator.BeginApplyLease(...)</c> wrap now encloses an actual apply
|
||||
/// (the coordinator refresh + future subscriber fan-out). Lease dispose fires
|
||||
/// on every exit path — success, exception, cancellation — so
|
||||
/// <c>ApplyLeaseRegistry</c> can never pin a crashed refresh at
|
||||
/// PrimaryMidApply.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Deliberately narrow scope: refreshes <see cref="RedundancyCoordinator"/>
|
||||
/// only. Driver re-init, virtual-tag re-bind, script-engine reload, etc. remain
|
||||
/// as follow-up wiring — add subscribers to this service's apply path as those
|
||||
/// components grow hot-reload support. The lease wrap is the right seam for
|
||||
/// those subscribers to hook.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class GenerationRefreshHostedService(
|
||||
NodeOptions options,
|
||||
ApplyLeaseRegistry leases,
|
||||
RedundancyCoordinator coordinator,
|
||||
ILogger<GenerationRefreshHostedService> logger,
|
||||
TimeSpan? tickInterval = null,
|
||||
Func<CancellationToken, Task<long?>>? currentGenerationQuery = null) : BackgroundService
|
||||
{
|
||||
private readonly Func<CancellationToken, Task<long?>> _generationQuery = currentGenerationQuery
|
||||
?? new Func<CancellationToken, Task<long?>>(ct => DefaultQueryCurrentGenerationAsync(options, logger, ct));
|
||||
/// <summary>
|
||||
/// How often the service polls <c>sp_GetCurrentGenerationForCluster</c>. Default 5 s —
|
||||
/// low enough that operator publishes take effect promptly, high enough that the
|
||||
/// overhead on the central DB is negligible even across a 100-node fleet.
|
||||
/// </summary>
|
||||
public TimeSpan TickInterval { get; } = tickInterval ?? TimeSpan.FromSeconds(5);
|
||||
|
||||
/// <summary>
|
||||
/// Newest generation the service has applied. Exposed for diagnostics +
|
||||
/// <see cref="TickCount"/> style health surfaces. <c>null</c> before the first
|
||||
/// successful poll.
|
||||
/// </summary>
|
||||
public long? LastAppliedGenerationId { get; private set; }
|
||||
|
||||
/// <summary>Successful ticks — whether or not a generation change was detected.</summary>
|
||||
public int TickCount { get; private set; }
|
||||
|
||||
/// <summary>Ticks that observed a generation change and ran a refresh.</summary>
|
||||
public int RefreshCount { get; private set; }
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
logger.LogInformation(
|
||||
"GenerationRefreshHostedService running — polling every {Tick}s",
|
||||
TickInterval.TotalSeconds);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await TickAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "GenerationRefreshHostedService tick failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(TickInterval, stoppingToken).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
// internal for tests — single-tick entry point.
|
||||
internal async Task TickAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var current = await _generationQuery(cancellationToken).ConfigureAwait(false);
|
||||
TickCount++;
|
||||
if (current is null) return;
|
||||
|
||||
if (LastAppliedGenerationId is long last && current == last)
|
||||
{
|
||||
return; // no change
|
||||
}
|
||||
|
||||
logger.LogInformation(
|
||||
"Generation change detected — {Previous} → {Current}; applying",
|
||||
LastAppliedGenerationId?.ToString() ?? "(none)", current);
|
||||
|
||||
// Lease wraps the apply window: ServiceLevelCalculator reads
|
||||
// ApplyLeaseRegistry.IsApplyInProgress and returns PrimaryMidApply (200) while any
|
||||
// lease is open. Publisher ticks in parallel (1s cadence) will observe the band
|
||||
// transition and push it onto the OPC UA Server.ServiceLevel node.
|
||||
var publishRequestId = Guid.NewGuid();
|
||||
await using (leases.BeginApplyLease(current.Value, publishRequestId))
|
||||
{
|
||||
await coordinator.RefreshAsync(cancellationToken).ConfigureAwait(false);
|
||||
// Future: fire a domain event that driver hosts / virtual-tag engine /
|
||||
// scripted-alarm engine subscribe to. For now the topology refresh is the
|
||||
// only thing we rewire — everything else still requires a process restart.
|
||||
}
|
||||
|
||||
LastAppliedGenerationId = current;
|
||||
RefreshCount++;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default generation-query implementation — reads via
|
||||
/// <c>sp_GetCurrentGenerationForCluster</c>. Returns <c>null</c> when no generation
|
||||
/// has been published yet, or when the DB call fails (logged at Warning; next tick
|
||||
/// retries). Tests inject a stub <see cref="Func{CancellationToken, Task}"/> via the
|
||||
/// <c>currentGenerationQuery</c> constructor parameter instead.
|
||||
/// </summary>
|
||||
private static async Task<long?> DefaultQueryCurrentGenerationAsync(
|
||||
NodeOptions options,
|
||||
ILogger logger,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var conn = new SqlConnection(options.ConfigDbConnectionString);
|
||||
await conn.OpenAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
|
||||
cmd.Parameters.AddWithValue("@n", options.NodeId);
|
||||
cmd.Parameters.AddWithValue("@c", options.ClusterId);
|
||||
|
||||
await using var reader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return reader.GetInt64(0);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "sp_GetCurrentGenerationForCluster failed — will retry");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
103
src/Server/ZB.MOM.WW.OtOpcUa.Server/Hosting/PeerHttpProbeLoop.cs
Normal file
103
src/Server/ZB.MOM.WW.OtOpcUa.Server/Hosting/PeerHttpProbeLoop.cs
Normal file
@@ -0,0 +1,103 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream B.1 — HTTP peer-probe loop. Polls every configured peer's
|
||||
/// <c>/healthz</c> endpoint on a fast cadence (default 2 s) with a short timeout
|
||||
/// (default 1 s) and writes the result to <see cref="PeerReachabilityTracker"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Fast-fail layer — the UA probe short-circuits when HTTP says dead, so a failing
|
||||
/// peer is detected within ~2 s without paying the cost of a full OPC UA session
|
||||
/// setup on every tick.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Writes preserve the last UA-health bit so a transient HTTP blip doesn't stomp the
|
||||
/// authoritative UA reading until the next UA tick. <see cref="PeerReachability"/>
|
||||
/// is a record; we compose a new one per update.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Probe target is derived as <c>http://{peer.Host}:{peer.DashboardPort}/healthz</c>.
|
||||
/// The server's own health-endpoints host serves <c>/healthz</c> on the dashboard
|
||||
/// port, so this is symmetric with what peers expect to be probed.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class PeerHttpProbeLoop(
|
||||
RedundancyCoordinator coordinator,
|
||||
PeerReachabilityTracker tracker,
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<PeerHttpProbeLoop> logger,
|
||||
PeerProbeOptions? options = null) : BackgroundService
|
||||
{
|
||||
private readonly PeerProbeOptions _options = options ?? new PeerProbeOptions();
|
||||
internal const string HttpClientName = "PeerHttpProbe";
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
logger.LogInformation(
|
||||
"PeerHttpProbeLoop running — probe every {Interval}ms, timeout {Timeout}ms",
|
||||
_options.HttpProbeInterval.TotalMilliseconds, _options.HttpProbeTimeout.TotalMilliseconds);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await TickAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "PeerHttpProbeLoop tick failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(_options.HttpProbeInterval, stoppingToken).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
// internal for tests — lets a unit test drive a single tick synchronously without the loop.
|
||||
internal async Task TickAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var topology = coordinator.Current;
|
||||
if (topology is null || topology.Peers.Count == 0) return;
|
||||
|
||||
// Probe every peer in parallel — one slow peer shouldn't block the cadence for others.
|
||||
var probes = topology.Peers.Select(p => ProbeAsync(p, cancellationToken)).ToArray();
|
||||
await Task.WhenAll(probes).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task ProbeAsync(RedundancyPeer peer, CancellationToken cancellationToken)
|
||||
{
|
||||
var url = $"http://{peer.Host}:{peer.DashboardPort}/healthz";
|
||||
var healthy = false;
|
||||
try
|
||||
{
|
||||
using var client = httpClientFactory.CreateClient(HttpClientName);
|
||||
client.Timeout = _options.HttpProbeTimeout;
|
||||
using var response = await client.GetAsync(url, cancellationToken).ConfigureAwait(false);
|
||||
healthy = response.IsSuccessStatusCode;
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return; // shutdown — drop the result rather than writing a false-unhealthy
|
||||
}
|
||||
catch (Exception ex) when (ex is HttpRequestException or TaskCanceledException or OperationCanceledException)
|
||||
{
|
||||
// Any transport-level failure counts as unhealthy — connection refused, timeout,
|
||||
// DNS fail, TLS fail. Swallow + mark unhealthy; don't log every tick, only when
|
||||
// state transitions.
|
||||
healthy = false;
|
||||
}
|
||||
|
||||
var previous = tracker.Get(peer.NodeId);
|
||||
if (previous.HttpHealthy != healthy)
|
||||
{
|
||||
logger.LogInformation("Peer {NodeId} HTTP probe {Transition} ({Url})",
|
||||
peer.NodeId, healthy ? "Healthy" : "Unhealthy", url);
|
||||
}
|
||||
tracker.Update(peer.NodeId, previous with { HttpHealthy = healthy });
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the Phase 6.3 Stream B peer-probe HostedServices
|
||||
/// (<see cref="PeerHttpProbeLoop"/> + <see cref="PeerUaProbeLoop"/>). Drives cadence +
|
||||
/// timeout for the two-layer probe model. Defaults match the spec in
|
||||
/// <c>docs/v2/implementation/phase-6-3-redundancy-runtime.md</c> §Stream B.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// HTTP layer is the fast-fail at 2 s / 1 s timeout; UA layer is authoritative at 10 s /
|
||||
/// 5 s timeout. The UA probe short-circuits when the HTTP probe last reported the peer
|
||||
/// unhealthy, to avoid burning TCP sessions on a known-dead endpoint.
|
||||
/// </remarks>
|
||||
public sealed class PeerProbeOptions
|
||||
{
|
||||
/// <summary>How often <see cref="PeerHttpProbeLoop"/> ticks. Default 2 s.</summary>
|
||||
public TimeSpan HttpProbeInterval { get; init; } = TimeSpan.FromSeconds(2);
|
||||
|
||||
/// <summary>Per-request timeout for the HTTP <c>/healthz</c> probe. Default 1 s.</summary>
|
||||
public TimeSpan HttpProbeTimeout { get; init; } = TimeSpan.FromSeconds(1);
|
||||
|
||||
/// <summary>How often <see cref="PeerUaProbeLoop"/> ticks. Default 10 s.</summary>
|
||||
public TimeSpan UaProbeInterval { get; init; } = TimeSpan.FromSeconds(10);
|
||||
|
||||
/// <summary>Per-request timeout for the OPC UA endpoint discovery probe. Default 5 s.</summary>
|
||||
public TimeSpan UaProbeTimeout { get; init; } = TimeSpan.FromSeconds(5);
|
||||
}
|
||||
133
src/Server/ZB.MOM.WW.OtOpcUa.Server/Hosting/PeerUaProbeLoop.cs
Normal file
133
src/Server/ZB.MOM.WW.OtOpcUa.Server/Hosting/PeerUaProbeLoop.cs
Normal file
@@ -0,0 +1,133 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream B.2 — OPC UA peer-probe loop. Opens a minimal discovery session to
|
||||
/// each peer's OPC UA endpoint on a slow cadence (default 10 s) and records
|
||||
/// <see cref="PeerReachability.UaHealthy"/> in the tracker.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Authoritative layer — the OPC UA discovery call verifies the endpoint actually
|
||||
/// serves UA traffic (not just that the host OS answers a TCP connect on 4840).
|
||||
/// If the peer passes UA discovery, it can serve real client sessions.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Short-circuits when the HTTP probe (<see cref="PeerHttpProbeLoop"/>) last marked
|
||||
/// the peer unhealthy — no point burning a full TCP+OPC UA handshake on a peer the
|
||||
/// fast-fail probe already says is dead. In that case <see cref="PeerReachability.UaHealthy"/>
|
||||
/// is cleared (stale-UA-state protection) so a sustained HTTP outage doesn't leave
|
||||
/// an ancient UaHealthy=true reading feeding the ServiceLevel calculator.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Implementation uses <c>DiscoveryClient.GetEndpoints</c> rather than opening a
|
||||
/// full authenticated <c>Session</c> — the discovery endpoint is server-side cheap
|
||||
/// (no session state), needs no certificate trust, and is specifically designed for
|
||||
/// availability pinging. Timeout bounded by <see cref="PeerProbeOptions.UaProbeTimeout"/>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class PeerUaProbeLoop(
|
||||
RedundancyCoordinator coordinator,
|
||||
PeerReachabilityTracker tracker,
|
||||
ILogger<PeerUaProbeLoop> logger,
|
||||
PeerProbeOptions? options = null,
|
||||
Func<string, TimeSpan, CancellationToken, Task<bool>>? endpointProbe = null) : BackgroundService
|
||||
{
|
||||
private readonly PeerProbeOptions _options = options ?? new PeerProbeOptions();
|
||||
private readonly Func<string, TimeSpan, CancellationToken, Task<bool>> _endpointProbe
|
||||
= endpointProbe ?? DefaultEndpointProbeAsync;
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
logger.LogInformation(
|
||||
"PeerUaProbeLoop running — probe every {Interval}ms, timeout {Timeout}ms",
|
||||
_options.UaProbeInterval.TotalMilliseconds, _options.UaProbeTimeout.TotalMilliseconds);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await TickAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "PeerUaProbeLoop tick failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(_options.UaProbeInterval, stoppingToken).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
// internal for tests — single-tick entry point.
|
||||
internal async Task TickAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var topology = coordinator.Current;
|
||||
if (topology is null || topology.Peers.Count == 0) return;
|
||||
|
||||
var probes = topology.Peers.Select(p => ProbeAsync(p, cancellationToken)).ToArray();
|
||||
await Task.WhenAll(probes).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task ProbeAsync(RedundancyPeer peer, CancellationToken cancellationToken)
|
||||
{
|
||||
var previous = tracker.Get(peer.NodeId);
|
||||
|
||||
// Short-circuit: don't waste a UA handshake when HTTP says the peer is down. Clear
|
||||
// the UA bit so the publisher doesn't see a stale "UA still healthy" reading.
|
||||
if (!previous.HttpHealthy)
|
||||
{
|
||||
if (previous.UaHealthy)
|
||||
{
|
||||
tracker.Update(peer.NodeId, previous with { UaHealthy = false });
|
||||
logger.LogInformation("Peer {NodeId} UA probe cleared (HTTP unhealthy)", peer.NodeId);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
var endpoint = $"opc.tcp://{peer.Host}:{peer.OpcUaPort}";
|
||||
var healthy = await _endpointProbe(endpoint, _options.UaProbeTimeout, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (previous.UaHealthy != healthy)
|
||||
{
|
||||
logger.LogInformation("Peer {NodeId} UA probe {Transition} ({Endpoint})",
|
||||
peer.NodeId, healthy ? "Healthy" : "Unhealthy", endpoint);
|
||||
}
|
||||
tracker.Update(peer.NodeId, previous with { UaHealthy = healthy });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Default probe — <c>DiscoveryClient.GetEndpoints</c> against the peer's OPC UA
|
||||
/// endpoint. Lightweight (no session, no certificate trust). Returns <c>true</c>
|
||||
/// iff the call returns at least one advertised endpoint within the supplied
|
||||
/// timeout; any transport, protocol, or timeout failure counts as unhealthy.
|
||||
/// </summary>
|
||||
internal static async Task<bool> DefaultEndpointProbeAsync(
|
||||
string endpointUrl, TimeSpan timeout, CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
linked.CancelAfter(timeout);
|
||||
|
||||
var config = EndpointConfiguration.Create();
|
||||
config.OperationTimeout = (int)timeout.TotalMilliseconds;
|
||||
|
||||
using var discoveryClient = DiscoveryClient.Create(new Uri(endpointUrl), config);
|
||||
var endpoints = await Task.Run(() => discoveryClient.GetEndpoints(null), linked.Token).ConfigureAwait(false);
|
||||
return endpoints is { Count: > 0 };
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream C (task #147) glue — drives <see cref="RedundancyStatePublisher"/> on
|
||||
/// a periodic tick and pushes the resulting ServiceLevel / ServerUriArray /
|
||||
/// RedundancySupport values onto the OPC UA Server node via
|
||||
/// <see cref="ServerRedundancyNodeWriter"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The OPC UA <c>ServerObject</c> exists only after <c>StandardServer.OnServerStarted</c>
|
||||
/// has run, which is inside <see cref="OpcUaApplicationHost.StartAsync"/>. This hosted
|
||||
/// service polls for <c>host.Server?.CurrentInstance</c> to become non-null before
|
||||
/// binding the writer — the server boot sequence doesn't expose a "ready" event.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Tick cadence is 1 s by default. The publisher is edge-triggered internally so a
|
||||
/// no-change tick is cheap; the writer is also idempotent so we can safely apply the
|
||||
/// same values every tick without generating spurious OPC UA notifications.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class RedundancyPublisherHostedService(
|
||||
OpcUaApplicationHost host,
|
||||
RedundancyStatePublisher publisher,
|
||||
RedundancyCoordinator coordinator,
|
||||
ILogger<RedundancyPublisherHostedService> logger,
|
||||
ILoggerFactory loggerFactory) : BackgroundService
|
||||
{
|
||||
public TimeSpan TickInterval { get; init; } = TimeSpan.FromSeconds(1);
|
||||
public TimeSpan ServerReadyPollInterval { get; init; } = TimeSpan.FromMilliseconds(250);
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
// 0. Load topology from the shared config DB. RefreshAsync (not InitializeAsync)
|
||||
// so an invariant violation degrades to ServiceLevelBand.InvalidTopology rather
|
||||
// than crashing the hosted service — operator visibility beats fail-fast here.
|
||||
await coordinator.RefreshAsync(stoppingToken).ConfigureAwait(false);
|
||||
|
||||
// 1. Wait for OPC UA server's ServerObject to materialize.
|
||||
var writer = await WaitForServerReadyAsync(stoppingToken).ConfigureAwait(false);
|
||||
if (writer is null) return; // cancelled before startup completed
|
||||
|
||||
// 2. Subscribe writer to publisher events — edge-triggered ServiceLevel +
|
||||
// ServerUriArray updates from the publisher fan out onto the Server node.
|
||||
publisher.OnStateChanged += OnServiceLevelChanged;
|
||||
publisher.OnServerUriArrayChanged += OnServerUriArrayChanged;
|
||||
|
||||
// 3. One-time RedundancySupport from the coordinator's current topology. If the
|
||||
// topology isn't loaded yet, we'll retry on the first compute-publish tick.
|
||||
ApplyRedundancySupportIfKnown(writer);
|
||||
|
||||
logger.LogInformation(
|
||||
"RedundancyPublisherHostedService running — tick every {Tick}ms",
|
||||
TickInterval.TotalMilliseconds);
|
||||
|
||||
try
|
||||
{
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
publisher.ComputeAndPublish();
|
||||
ApplyRedundancySupportIfKnown(writer); // cheap + idempotent
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "RedundancyStatePublisher tick failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(TickInterval, stoppingToken).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
publisher.OnStateChanged -= OnServiceLevelChanged;
|
||||
publisher.OnServerUriArrayChanged -= OnServerUriArrayChanged;
|
||||
}
|
||||
|
||||
void OnServiceLevelChanged(ServiceLevelSnapshot snap) => writer.ApplyServiceLevel(snap.Value);
|
||||
void OnServerUriArrayChanged(IReadOnlyList<string> uris) => writer.ApplyServerUriArray(uris);
|
||||
}
|
||||
|
||||
private async Task<ServerRedundancyNodeWriter?> WaitForServerReadyAsync(CancellationToken ct)
|
||||
{
|
||||
// Bounded retry so a genuine failure to start doesn't pin the hosted service forever.
|
||||
// 60s is generous — production boot is ~2s on this box; cert PKI + certificate-creation
|
||||
// cases have been observed to take up to 15s cold.
|
||||
//
|
||||
// StandardServer.CurrentInstance throws BadServerHalted before OnServerStarted has run,
|
||||
// rather than returning null, so we catch that specifically and retry. Other
|
||||
// ServiceResultException codes (e.g. BadInternalError) are still propagated — a true
|
||||
// boot failure shouldn't look like "not ready yet".
|
||||
var deadline = DateTime.UtcNow.AddSeconds(60);
|
||||
while (!ct.IsCancellationRequested && DateTime.UtcNow < deadline)
|
||||
{
|
||||
Opc.Ua.Server.IServerInternal? serverInternal = null;
|
||||
try { serverInternal = host.Server?.CurrentInstance; }
|
||||
catch (Opc.Ua.ServiceResultException ex) when (ex.StatusCode == Opc.Ua.StatusCodes.BadServerHalted)
|
||||
{
|
||||
// Server is mid-startup — keep polling.
|
||||
}
|
||||
|
||||
if (serverInternal?.ServerObject is not null)
|
||||
{
|
||||
var writerLogger = loggerFactory.CreateLogger<ServerRedundancyNodeWriter>();
|
||||
return new ServerRedundancyNodeWriter(serverInternal, writerLogger);
|
||||
}
|
||||
|
||||
try { await Task.Delay(ServerReadyPollInterval, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { return null; }
|
||||
}
|
||||
|
||||
if (!ct.IsCancellationRequested)
|
||||
logger.LogError("OPC UA ServerObject did not materialize within 60s — Phase 6.3 Stream C wiring is inactive");
|
||||
return null;
|
||||
}
|
||||
|
||||
private void ApplyRedundancySupportIfKnown(ServerRedundancyNodeWriter writer)
|
||||
{
|
||||
var topology = coordinator.Current;
|
||||
if (topology is null) return;
|
||||
writer.ApplyRedundancySupport(topology.Mode);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Samples <see cref="DriverResilienceStatusTracker"/> at a fixed tick + upserts each
|
||||
/// <c>(DriverInstanceId, HostName)</c> snapshot into <see cref="DriverInstanceResilienceStatus"/>
|
||||
/// so Admin <c>/hosts</c> can render live resilience counters across restarts.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Closes the HostedService piece of Phase 6.1 Stream E.2 flagged as a follow-up
|
||||
/// when the tracker shipped in PR #82. The Admin UI column-refresh piece (red badge when
|
||||
/// ConsecutiveFailures > breakerThreshold / 2 + SignalR push) is still deferred to
|
||||
/// the visual-compliance pass — this service owns the persistence half alone.</para>
|
||||
///
|
||||
/// <para>Tick interval defaults to 5 s. Persistence is best-effort: a DB outage during
|
||||
/// a tick logs + continues; the next tick tries again with the latest snapshots. The
|
||||
/// hosted service never crashes the app on sample failure.</para>
|
||||
///
|
||||
/// <para><see cref="PersistOnceAsync"/> factored as a public method so tests can drive
|
||||
/// it directly, matching the <see cref="ScheduledRecycleHostedService.TickOnceAsync"/>
|
||||
/// pattern for deterministic unit-test timing.</para>
|
||||
/// </remarks>
|
||||
public sealed class ResilienceStatusPublisherHostedService : BackgroundService
|
||||
{
|
||||
private readonly DriverResilienceStatusTracker _tracker;
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbContextFactory;
|
||||
private readonly ILogger<ResilienceStatusPublisherHostedService> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
/// <summary>Tick interval — how often the tracker snapshot is persisted.</summary>
|
||||
public TimeSpan TickInterval { get; }
|
||||
|
||||
/// <summary>Snapshot of the tick count for diagnostics + test assertions.</summary>
|
||||
public int TickCount { get; private set; }
|
||||
|
||||
public ResilienceStatusPublisherHostedService(
|
||||
DriverResilienceStatusTracker tracker,
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbContextFactory,
|
||||
ILogger<ResilienceStatusPublisherHostedService> logger,
|
||||
TimeProvider? timeProvider = null,
|
||||
TimeSpan? tickInterval = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(tracker);
|
||||
ArgumentNullException.ThrowIfNull(dbContextFactory);
|
||||
|
||||
_tracker = tracker;
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
TickInterval = tickInterval ?? TimeSpan.FromSeconds(5);
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"ResilienceStatusPublisherHostedService starting — tick interval = {Interval}",
|
||||
TickInterval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TickInterval, _timeProvider, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
await PersistOnceAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
_logger.LogInformation("ResilienceStatusPublisherHostedService stopping after {TickCount} tick(s).", TickCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Take one snapshot of the tracker + upsert each pair into the persistence table.
|
||||
/// Swallows transient exceptions + logs them; never throws from a sample failure.
|
||||
/// </summary>
|
||||
public async Task PersistOnceAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
TickCount++;
|
||||
var snapshot = _tracker.Snapshot();
|
||||
if (snapshot.Count == 0) return;
|
||||
|
||||
try
|
||||
{
|
||||
await using var db = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
|
||||
var now = _timeProvider.GetUtcNow().UtcDateTime;
|
||||
|
||||
foreach (var (driverInstanceId, hostName, counters) in snapshot)
|
||||
{
|
||||
var existing = await db.DriverInstanceResilienceStatuses
|
||||
.FirstOrDefaultAsync(x => x.DriverInstanceId == driverInstanceId && x.HostName == hostName, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (existing is null)
|
||||
{
|
||||
db.DriverInstanceResilienceStatuses.Add(new DriverInstanceResilienceStatus
|
||||
{
|
||||
DriverInstanceId = driverInstanceId,
|
||||
HostName = hostName,
|
||||
LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc,
|
||||
ConsecutiveFailures = counters.ConsecutiveFailures,
|
||||
CurrentBulkheadDepth = counters.CurrentInFlight,
|
||||
LastRecycleUtc = counters.LastRecycleUtc,
|
||||
BaselineFootprintBytes = counters.BaselineFootprintBytes,
|
||||
CurrentFootprintBytes = counters.CurrentFootprintBytes,
|
||||
LastSampledUtc = now,
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.LastCircuitBreakerOpenUtc = counters.LastBreakerOpenUtc;
|
||||
existing.ConsecutiveFailures = counters.ConsecutiveFailures;
|
||||
existing.CurrentBulkheadDepth = counters.CurrentInFlight;
|
||||
existing.LastRecycleUtc = counters.LastRecycleUtc;
|
||||
existing.BaselineFootprintBytes = counters.BaselineFootprintBytes;
|
||||
existing.CurrentFootprintBytes = counters.CurrentFootprintBytes;
|
||||
existing.LastSampledUtc = now;
|
||||
}
|
||||
}
|
||||
|
||||
await db.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"ResilienceStatusPublisher persistence tick failed; next tick will retry with latest snapshots.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Drives one or more <see cref="ScheduledRecycleScheduler"/> instances on a fixed tick
|
||||
/// cadence. Closes Phase 6.1 Stream B.4 by turning the shipped-as-pure-logic scheduler
|
||||
/// into a running background feature.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Registered as a singleton in Program.cs. Each Tier C driver instance that wants a
|
||||
/// scheduled recycle registers its scheduler via
|
||||
/// <see cref="AddScheduler(ScheduledRecycleScheduler)"/> at startup. The hosted service
|
||||
/// wakes every <see cref="TickInterval"/> (default 1 min) and calls
|
||||
/// <see cref="ScheduledRecycleScheduler.TickAsync"/> on each registered scheduler.</para>
|
||||
///
|
||||
/// <para>Scheduler registration is closed after <see cref="ExecuteAsync"/> starts — callers
|
||||
/// must register before the host starts, typically during DI setup. Adding a scheduler
|
||||
/// mid-flight throws to avoid confusing "some ticks saw my scheduler, some didn't" races.</para>
|
||||
/// </remarks>
|
||||
public sealed class ScheduledRecycleHostedService : BackgroundService
|
||||
{
|
||||
private readonly List<ScheduledRecycleScheduler> _schedulers = [];
|
||||
private readonly ILogger<ScheduledRecycleHostedService> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private bool _started;
|
||||
|
||||
/// <summary>How often <see cref="ScheduledRecycleScheduler.TickAsync"/> fires on each registered scheduler.</summary>
|
||||
public TimeSpan TickInterval { get; }
|
||||
|
||||
public ScheduledRecycleHostedService(
|
||||
ILogger<ScheduledRecycleHostedService> logger,
|
||||
TimeProvider? timeProvider = null,
|
||||
TimeSpan? tickInterval = null)
|
||||
{
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
TickInterval = tickInterval ?? TimeSpan.FromMinutes(1);
|
||||
}
|
||||
|
||||
/// <summary>Register a scheduler to drive. Must be called before the host starts.</summary>
|
||||
public void AddScheduler(ScheduledRecycleScheduler scheduler)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(scheduler);
|
||||
if (_started)
|
||||
throw new InvalidOperationException(
|
||||
"Cannot register a ScheduledRecycleScheduler after the hosted service has started. " +
|
||||
"Register all schedulers during DI configuration / startup.");
|
||||
_schedulers.Add(scheduler);
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of the current tick count — diagnostics only.</summary>
|
||||
public int TickCount { get; private set; }
|
||||
|
||||
/// <summary>Snapshot of the number of registered schedulers — diagnostics only.</summary>
|
||||
public int SchedulerCount => _schedulers.Count;
|
||||
|
||||
public override Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_started = true;
|
||||
return base.StartAsync(cancellationToken);
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"ScheduledRecycleHostedService starting — {Count} scheduler(s), tick interval = {Interval}",
|
||||
_schedulers.Count, TickInterval);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TickInterval, _timeProvider, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
await TickOnceAsync(stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
_logger.LogInformation("ScheduledRecycleHostedService stopping after {TickCount} tick(s).", TickCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Execute one scheduler tick against every registered scheduler. Factored out of the
|
||||
/// <see cref="ExecuteAsync"/> loop so tests can drive it directly without needing to
|
||||
/// synchronize with <see cref="Task.Delay(TimeSpan, TimeProvider, CancellationToken)"/>.
|
||||
/// </summary>
|
||||
public async Task TickOnceAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow().UtcDateTime;
|
||||
TickCount++;
|
||||
|
||||
foreach (var scheduler in _schedulers)
|
||||
{
|
||||
try
|
||||
{
|
||||
var fired = await scheduler.TickAsync(now, cancellationToken).ConfigureAwait(false);
|
||||
if (fired)
|
||||
_logger.LogInformation("Scheduled recycle fired at {Now:o}; next = {Next:o}",
|
||||
now, scheduler.NextRecycleUtc);
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// A single scheduler fault must not take down the rest — log + continue.
|
||||
_logger.LogError(ex,
|
||||
"ScheduledRecycleScheduler tick failed at {Now:o}; continuing to other schedulers.", now);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
64
src/Server/ZB.MOM.WW.OtOpcUa.Server/NodeBootstrap.cs
Normal file
64
src/Server/ZB.MOM.WW.OtOpcUa.Server/NodeBootstrap.cs
Normal file
@@ -0,0 +1,64 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Bootstraps a node: fetches the current generation from the central DB via
|
||||
/// <c>sp_GetCurrentGenerationForCluster</c>. If the DB is unreachable and a LiteDB cache entry
|
||||
/// exists, falls back to cached config per decision #79 (degraded-but-running).
|
||||
/// </summary>
|
||||
public sealed class NodeBootstrap(
|
||||
NodeOptions options,
|
||||
ILocalConfigCache localCache,
|
||||
ILogger<NodeBootstrap> logger)
|
||||
{
|
||||
public async Task<BootstrapResult> LoadCurrentGenerationAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var conn = new SqlConnection(options.ConfigDbConnectionString);
|
||||
await conn.OpenAsync(ct);
|
||||
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
|
||||
cmd.Parameters.AddWithValue("@n", options.NodeId);
|
||||
cmd.Parameters.AddWithValue("@c", options.ClusterId);
|
||||
|
||||
await using var reader = await cmd.ExecuteReaderAsync(ct);
|
||||
if (!await reader.ReadAsync(ct))
|
||||
{
|
||||
logger.LogWarning("Cluster {Cluster} has no Published generation yet", options.ClusterId);
|
||||
return BootstrapResult.EmptyFromDb();
|
||||
}
|
||||
|
||||
var generationId = reader.GetInt64(0);
|
||||
logger.LogInformation("Bootstrapped from central DB: generation {GenerationId}", generationId);
|
||||
return BootstrapResult.FromDb(generationId);
|
||||
}
|
||||
catch (Exception ex) when (ex is SqlException or InvalidOperationException or TimeoutException)
|
||||
{
|
||||
logger.LogWarning(ex, "Central DB unreachable; trying LiteDB cache fallback (decision #79)");
|
||||
var cached = await localCache.GetMostRecentAsync(options.ClusterId, ct);
|
||||
if (cached is null)
|
||||
throw new BootstrapException(
|
||||
"Central DB unreachable and no local cache available — cannot bootstrap.", ex);
|
||||
|
||||
logger.LogWarning("Bootstrapping from cache: generation {GenerationId} cached at {At}",
|
||||
cached.GenerationId, cached.CachedAt);
|
||||
return BootstrapResult.FromCache(cached.GenerationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed record BootstrapResult(long? GenerationId, BootstrapSource Source)
|
||||
{
|
||||
public static BootstrapResult FromDb(long g) => new(g, BootstrapSource.CentralDb);
|
||||
public static BootstrapResult FromCache(long g) => new(g, BootstrapSource.LocalCache);
|
||||
public static BootstrapResult EmptyFromDb() => new(null, BootstrapSource.CentralDb);
|
||||
}
|
||||
|
||||
public enum BootstrapSource { CentralDb, LocalCache }
|
||||
|
||||
public sealed class BootstrapException(string message, Exception inner) : Exception(message, inner);
|
||||
28
src/Server/ZB.MOM.WW.OtOpcUa.Server/NodeOptions.cs
Normal file
28
src/Server/ZB.MOM.WW.OtOpcUa.Server/NodeOptions.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Bootstrap configuration read from <c>appsettings.json</c> (decision #18) — the minimum a
|
||||
/// node needs to reach the central config DB and identify itself. Everything else comes from
|
||||
/// the DB after bootstrap succeeds.
|
||||
/// </summary>
|
||||
public sealed class NodeOptions
|
||||
{
|
||||
public const string SectionName = "Node";
|
||||
|
||||
/// <summary>Stable node ID matching <c>ClusterNode.NodeId</c> in the central config DB.</summary>
|
||||
public required string NodeId { get; init; }
|
||||
|
||||
/// <summary>Cluster this node belongs to.</summary>
|
||||
public required string ClusterId { get; init; }
|
||||
|
||||
/// <summary>SQL Server connection string for the central config DB.</summary>
|
||||
public required string ConfigDbConnectionString { get; init; }
|
||||
|
||||
/// <summary>Path to the LiteDB local cache file.</summary>
|
||||
public string LocalCachePath { get; init; } = "config_cache.db";
|
||||
|
||||
/// <summary>Phase 6.2 authorization pipeline config. Disabled by default.</summary>
|
||||
public AuthorizationOptions Authorization { get; init; } = new();
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Observability;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Standalone <see cref="HttpListener"/> host for <c>/healthz</c> and <c>/readyz</c>
|
||||
/// separate from the OPC UA binding. Per <c>docs/v2/implementation/phase-6-1-resilience-
|
||||
/// and-observability.md</c> §Stream C.1.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Binds to <c>http://localhost:4841</c> by default — loopback avoids the Windows URL-ACL
|
||||
/// elevation requirement that binding to <c>http://+:4841</c> (wildcard) would impose.
|
||||
/// When a deployment needs remote probing, a reverse proxy or explicit netsh urlacl grant
|
||||
/// is the expected path; documented in <c>docs/v2/Server-Deployment.md</c> in a follow-up.
|
||||
/// </remarks>
|
||||
public sealed class HealthEndpointsHost : IAsyncDisposable
|
||||
{
|
||||
private readonly string _prefix;
|
||||
private readonly DriverHost _driverHost;
|
||||
private readonly Func<bool> _configDbHealthy;
|
||||
private readonly Func<bool> _usingStaleConfig;
|
||||
private readonly ILogger<HealthEndpointsHost> _logger;
|
||||
private readonly HttpListener _listener = new();
|
||||
private readonly DateTime _startedUtc = DateTime.UtcNow;
|
||||
private CancellationTokenSource? _cts;
|
||||
private Task? _acceptLoop;
|
||||
private bool _disposed;
|
||||
|
||||
public HealthEndpointsHost(
|
||||
DriverHost driverHost,
|
||||
ILogger<HealthEndpointsHost> logger,
|
||||
Func<bool>? configDbHealthy = null,
|
||||
Func<bool>? usingStaleConfig = null,
|
||||
string prefix = "http://localhost:4841/")
|
||||
{
|
||||
_driverHost = driverHost;
|
||||
_logger = logger;
|
||||
_configDbHealthy = configDbHealthy ?? (() => true);
|
||||
_usingStaleConfig = usingStaleConfig ?? (() => false);
|
||||
_prefix = prefix.EndsWith('/') ? prefix : prefix + "/";
|
||||
_listener.Prefixes.Add(_prefix);
|
||||
}
|
||||
|
||||
public void Start()
|
||||
{
|
||||
_listener.Start();
|
||||
_cts = new CancellationTokenSource();
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.LogInformation("Health endpoints listening on {Prefix}", _prefix);
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
HttpListenerContext ctx;
|
||||
try
|
||||
{
|
||||
ctx = await _listener.GetContextAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (HttpListenerException) when (ct.IsCancellationRequested) { break; }
|
||||
catch (ObjectDisposedException) { break; }
|
||||
|
||||
_ = Task.Run(() => HandleAsync(ctx), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task HandleAsync(HttpListenerContext ctx)
|
||||
{
|
||||
try
|
||||
{
|
||||
var path = ctx.Request.Url?.AbsolutePath ?? "/";
|
||||
switch (path)
|
||||
{
|
||||
case "/healthz":
|
||||
await WriteHealthzAsync(ctx).ConfigureAwait(false);
|
||||
break;
|
||||
case "/readyz":
|
||||
await WriteReadyzAsync(ctx).ConfigureAwait(false);
|
||||
break;
|
||||
default:
|
||||
// #154 — driver-diagnostics path family. URL shape:
|
||||
// /diagnostics/drivers/{driverInstanceId}/modbus/auto-prohibited
|
||||
// Driver-agnostic at the URL level so future driver types (S7, AbCip,
|
||||
// FOCAS) can add their own per-type subpaths.
|
||||
if (path.StartsWith("/diagnostics/drivers/", StringComparison.Ordinal))
|
||||
await WriteDriverDiagnosticsAsync(ctx, path).ConfigureAwait(false);
|
||||
else
|
||||
ctx.Response.StatusCode = 404;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Health endpoint handler failure");
|
||||
try { ctx.Response.StatusCode = 500; } catch { /* ignore */ }
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { ctx.Response.Close(); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
private async Task WriteHealthzAsync(HttpListenerContext ctx)
|
||||
{
|
||||
var configHealthy = _configDbHealthy();
|
||||
var staleConfig = _usingStaleConfig();
|
||||
// /healthz is 200 when process alive + (config DB reachable OR cache-warm).
|
||||
// Stale-config still serves 200 so the process isn't flagged dead when the DB
|
||||
// blips; the body surfaces the stale flag for operators.
|
||||
var healthy = configHealthy || staleConfig;
|
||||
ctx.Response.StatusCode = healthy ? 200 : 503;
|
||||
|
||||
var body = JsonSerializer.Serialize(new
|
||||
{
|
||||
status = healthy ? "healthy" : "unhealthy",
|
||||
uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
|
||||
configDbReachable = configHealthy,
|
||||
usingStaleConfig = staleConfig,
|
||||
});
|
||||
await WriteBodyAsync(ctx, body).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task WriteReadyzAsync(HttpListenerContext ctx)
|
||||
{
|
||||
var snapshots = BuildSnapshots();
|
||||
var verdict = DriverHealthReport.Aggregate(snapshots);
|
||||
ctx.Response.StatusCode = DriverHealthReport.HttpStatus(verdict);
|
||||
|
||||
var body = JsonSerializer.Serialize(new
|
||||
{
|
||||
verdict = verdict.ToString(),
|
||||
uptimeSeconds = (int)(DateTime.UtcNow - _startedUtc).TotalSeconds,
|
||||
drivers = snapshots.Select(d => new
|
||||
{
|
||||
id = d.DriverInstanceId,
|
||||
state = d.State.ToString(),
|
||||
detail = d.DetailMessage,
|
||||
}).ToArray(),
|
||||
degradedDrivers = snapshots
|
||||
.Where(d => d.State == DriverState.Degraded || d.State == DriverState.Reconnecting)
|
||||
.Select(d => d.DriverInstanceId)
|
||||
.ToArray(),
|
||||
});
|
||||
await WriteBodyAsync(ctx, body).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private IReadOnlyList<DriverHealthSnapshot> BuildSnapshots()
|
||||
{
|
||||
var list = new List<DriverHealthSnapshot>();
|
||||
foreach (var id in _driverHost.RegisteredDriverIds)
|
||||
{
|
||||
var driver = _driverHost.GetDriver(id);
|
||||
if (driver is null) continue;
|
||||
var health = driver.GetHealth();
|
||||
list.Add(new DriverHealthSnapshot(driver.DriverInstanceId, health.State, health.LastError));
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// #154 — driver-diagnostics endpoint family. Routes
|
||||
/// <c>/diagnostics/drivers/{driverId}/modbus/auto-prohibited</c> to the live
|
||||
/// <see cref="ModbusDriver"/> instance's <see cref="ModbusDriver.GetAutoProhibitedRanges"/>.
|
||||
/// 404 when the driver instance doesn't exist; 400 when it exists but isn't a Modbus
|
||||
/// driver (the per-type endpoint is wrong for this row).
|
||||
/// </summary>
|
||||
private async Task WriteDriverDiagnosticsAsync(HttpListenerContext ctx, string path)
|
||||
{
|
||||
// Path shape: /diagnostics/drivers/{id}/modbus/auto-prohibited
|
||||
var segments = path.Split('/', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (segments.Length < 4 || segments[0] != "diagnostics" || segments[1] != "drivers")
|
||||
{
|
||||
ctx.Response.StatusCode = 404;
|
||||
return;
|
||||
}
|
||||
|
||||
var driverId = segments[2];
|
||||
var driver = _driverHost.GetDriver(driverId);
|
||||
if (driver is null)
|
||||
{
|
||||
ctx.Response.StatusCode = 404;
|
||||
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new { error = $"Driver '{driverId}' not found" })).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Per-driver-type subpath dispatch. Today only Modbus is wired; future drivers add
|
||||
// their own segments[3] cases.
|
||||
if (segments.Length >= 5 && segments[3] == "modbus" && segments[4] == "auto-prohibited")
|
||||
{
|
||||
if (driver is not ModbusDriver modbus)
|
||||
{
|
||||
ctx.Response.StatusCode = 400;
|
||||
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new { error = $"Driver '{driverId}' is not a Modbus driver (type: {driver.DriverType})" })).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
var ranges = modbus.GetAutoProhibitedRanges();
|
||||
ctx.Response.StatusCode = 200;
|
||||
await WriteBodyAsync(ctx, JsonSerializer.Serialize(new
|
||||
{
|
||||
driverInstanceId = driverId,
|
||||
count = ranges.Count,
|
||||
ranges = ranges.Select(r => new
|
||||
{
|
||||
unitId = r.UnitId,
|
||||
region = r.Region.ToString(),
|
||||
startAddress = r.StartAddress,
|
||||
endAddress = r.EndAddress,
|
||||
lastProbedUtc = r.LastProbedUtc,
|
||||
bisectionPending = r.BisectionPending,
|
||||
}).ToArray(),
|
||||
})).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.Response.StatusCode = 404;
|
||||
}
|
||||
|
||||
private static async Task WriteBodyAsync(HttpListenerContext ctx, string body)
|
||||
{
|
||||
var bytes = Encoding.UTF8.GetBytes(body);
|
||||
ctx.Response.ContentType = "application/json; charset=utf-8";
|
||||
ctx.Response.ContentLength64 = bytes.LongLength;
|
||||
await ctx.Response.OutputStream.WriteAsync(bytes).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_cts?.Cancel();
|
||||
try { _listener.Stop(); } catch { /* ignore */ }
|
||||
if (_acceptLoop is not null)
|
||||
{
|
||||
try { await _acceptLoop.ConfigureAwait(false); } catch { /* ignore */ }
|
||||
}
|
||||
_listener.Close();
|
||||
_cts?.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Holds pre-loaded <see cref="EquipmentNamespaceContent"/> snapshots keyed by
|
||||
/// <c>DriverInstanceId</c>. Populated once during <see cref="OpcUaServerService"/> startup
|
||||
/// (after <see cref="NodeBootstrap"/> resolves the generation) so the synchronous lookup
|
||||
/// delegate on <see cref="OpcUaApplicationHost"/> can serve the walker from memory without
|
||||
/// blocking on async DB I/O mid-dispatch.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>The registry is intentionally a shared mutable singleton with set-once-per-bootstrap
|
||||
/// semantics rather than an immutable map passed by value — the composition in Program.cs
|
||||
/// builds <see cref="OpcUaApplicationHost"/> before <see cref="NodeBootstrap"/> runs, so the
|
||||
/// registry must exist at DI-compose time but be empty until the generation is known. A
|
||||
/// driver registered after the initial populate pass simply returns null from
|
||||
/// <see cref="Get"/> + the wire-in falls back to the "no UNS content, let DiscoverAsync own
|
||||
/// it" path that PR #155 established.</para>
|
||||
/// </remarks>
|
||||
public sealed class DriverEquipmentContentRegistry
|
||||
{
|
||||
private readonly Dictionary<string, EquipmentNamespaceContent> _content =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Lock _lock = new();
|
||||
|
||||
public EquipmentNamespaceContent? Get(string driverInstanceId)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _content.TryGetValue(driverInstanceId, out var c) ? c : null;
|
||||
}
|
||||
}
|
||||
|
||||
public void Set(string driverInstanceId, EquipmentNamespaceContent content)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_content[driverInstanceId] = content;
|
||||
}
|
||||
}
|
||||
|
||||
public int Count
|
||||
{
|
||||
get { lock (_lock) { return _content.Count; } }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot the current driver → content map. Returns a copy so callers can iterate
|
||||
/// without holding the lock. Used at authorization bootstrap to merge all namespaces
|
||||
/// into a single <see cref="Security.NodeScopeResolver"/> path index.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, EquipmentNamespaceContent> Snapshot()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return new Dictionary<string, EquipmentNamespaceContent>(_content, StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
}
|
||||
}
|
||||
1530
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs
Normal file
1530
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,86 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Loads the <see cref="EquipmentNamespaceContent"/> snapshot the
|
||||
/// <see cref="EquipmentNodeWalker"/> consumes, scoped to a single
|
||||
/// (driverInstanceId, generationId) pair. Joins the four row sets the walker expects:
|
||||
/// UnsAreas for the driver's cluster, UnsLines under those areas, Equipment bound to
|
||||
/// this driver + its lines, and Tags bound to this driver + its equipment — all at the
|
||||
/// supplied generation.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>The walker is driver-instance-scoped (decisions #116–#121 put the UNS in the
|
||||
/// Equipment-kind namespace owned by one driver instance at a time), so this loader is
|
||||
/// too — a single call returns one driver's worth of rows, never the whole fleet.</para>
|
||||
///
|
||||
/// <para>Returns <c>null</c> when the driver instance has no Equipment rows at the
|
||||
/// supplied generation. The wire-in in <see cref="OpcUaApplicationHost"/> treats null as
|
||||
/// "this driver has no UNS content, skip the walker and let DiscoverAsync own the whole
|
||||
/// address space" — the backward-compat path for drivers whose namespace kind is not
|
||||
/// Equipment (Modbus / AB CIP / TwinCAT / FOCAS).</para>
|
||||
/// </remarks>
|
||||
public sealed class EquipmentNamespaceContentLoader
|
||||
{
|
||||
private readonly OtOpcUaConfigDbContext _db;
|
||||
|
||||
public EquipmentNamespaceContentLoader(OtOpcUaConfigDbContext db)
|
||||
{
|
||||
_db = db;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load the walker-shaped snapshot for <paramref name="driverInstanceId"/> at
|
||||
/// <paramref name="generationId"/>. Returns <c>null</c> when the driver has no
|
||||
/// Equipment rows at that generation.
|
||||
/// </summary>
|
||||
public async Task<EquipmentNamespaceContent?> LoadAsync(
|
||||
string driverInstanceId, long generationId, CancellationToken ct)
|
||||
{
|
||||
var equipment = await _db.Equipment
|
||||
.AsNoTracking()
|
||||
.Where(e => e.DriverInstanceId == driverInstanceId && e.GenerationId == generationId && e.Enabled)
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
if (equipment.Count == 0)
|
||||
return null;
|
||||
|
||||
// Filter UNS tree to only the lines + areas that host at least one Equipment bound to
|
||||
// this driver — skips loading unrelated UNS branches from the cluster. LinesByArea
|
||||
// grouping is driven off the Equipment rows so an empty line (no equipment) doesn't
|
||||
// pull a pointless folder into the walker output.
|
||||
var lineIds = equipment.Select(e => e.UnsLineId).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
|
||||
|
||||
var lines = await _db.UnsLines
|
||||
.AsNoTracking()
|
||||
.Where(l => l.GenerationId == generationId && lineIds.Contains(l.UnsLineId))
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
var areaIds = lines.Select(l => l.UnsAreaId).Distinct(StringComparer.OrdinalIgnoreCase).ToArray();
|
||||
|
||||
var areas = await _db.UnsAreas
|
||||
.AsNoTracking()
|
||||
.Where(a => a.GenerationId == generationId && areaIds.Contains(a.UnsAreaId))
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
// Tags belonging to this driver at this generation. Walker skips Tags with null
|
||||
// EquipmentId (those are SystemPlatform-kind Galaxy tags per decision #120) but we
|
||||
// load them anyway so the same rowset can drive future non-Equipment-kind walks
|
||||
// without re-hitting the DB. Filtering here is a future optimization; today the
|
||||
// per-tag cost is bounded by driver scope.
|
||||
var tags = await _db.Tags
|
||||
.AsNoTracking()
|
||||
.Where(t => t.DriverInstanceId == driverInstanceId && t.GenerationId == generationId)
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
return new EquipmentNamespaceContent(
|
||||
Areas: areas,
|
||||
Lines: lines,
|
||||
Equipment: equipment,
|
||||
Tags: tags);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,351 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Observability;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps <see cref="ApplicationInstance"/> to bring the OPC UA server online — builds an
|
||||
/// <see cref="ApplicationConfiguration"/> programmatically (no external XML file), ensures
|
||||
/// the application certificate exists in the PKI store (auto-generates self-signed on first
|
||||
/// run), starts the server, then walks each <see cref="DriverNodeManager"/> and invokes
|
||||
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> against it so the driver's
|
||||
/// discovery streams into the already-running server's address space.
|
||||
/// </summary>
|
||||
public sealed class OpcUaApplicationHost : IAsyncDisposable
|
||||
{
|
||||
private readonly OpcUaServerOptions _options;
|
||||
private readonly DriverHost _driverHost;
|
||||
private readonly IUserAuthenticator _authenticator;
|
||||
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
|
||||
private AuthorizationGate? _authzGate;
|
||||
private NodeScopeResolver? _scopeResolver;
|
||||
private readonly StaleConfigFlag? _staleConfigFlag;
|
||||
private readonly Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? _tierLookup;
|
||||
private readonly Func<string, string?>? _resilienceConfigLookup;
|
||||
private readonly Func<string, ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNamespaceContent?>? _equipmentContentLookup;
|
||||
|
||||
// Phase 7 Stream G follow-up (task #239). When composed with the VirtualTagEngine +
|
||||
// ScriptedAlarmEngine sources these route node reads to the engines instead of the
|
||||
// driver. Null = Phase 7 engines not enabled for this deployment (identical to pre-
|
||||
// Phase-7 behaviour). Late-bindable via SetPhase7Sources because the engines need
|
||||
// the bootstrapped generation id before they can compose, which is only known after
|
||||
// the host has been DI-constructed (task #246).
|
||||
private ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? _virtualReadable;
|
||||
private ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? _scriptedAlarmReadable;
|
||||
|
||||
// PR 1+2.W — server-level singletons. Threaded through to OtOpcUaServer + every
|
||||
// DriverNodeManager. Default null preserves existing test construction sites that
|
||||
// don't opt into the new server-side history routing or alarm-condition state machine.
|
||||
private readonly IHistoryRouter? _historyRouter;
|
||||
private readonly AlarmConditionService? _alarmConditionService;
|
||||
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<OpcUaApplicationHost> _logger;
|
||||
private ApplicationInstance? _application;
|
||||
private OtOpcUaServer? _server;
|
||||
private HealthEndpointsHost? _healthHost;
|
||||
private bool _disposed;
|
||||
|
||||
public OpcUaApplicationHost(OpcUaServerOptions options, DriverHost driverHost,
|
||||
IUserAuthenticator authenticator, ILoggerFactory loggerFactory, ILogger<OpcUaApplicationHost> logger,
|
||||
DriverResiliencePipelineBuilder? pipelineBuilder = null,
|
||||
AuthorizationGate? authzGate = null,
|
||||
NodeScopeResolver? scopeResolver = null,
|
||||
StaleConfigFlag? staleConfigFlag = null,
|
||||
Func<string, ZB.MOM.WW.OtOpcUa.Core.Abstractions.DriverTier>? tierLookup = null,
|
||||
Func<string, string?>? resilienceConfigLookup = null,
|
||||
Func<string, ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNamespaceContent?>? equipmentContentLookup = null,
|
||||
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? virtualReadable = null,
|
||||
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? scriptedAlarmReadable = null,
|
||||
IHistoryRouter? historyRouter = null,
|
||||
AlarmConditionService? alarmConditionService = null)
|
||||
{
|
||||
_options = options;
|
||||
_driverHost = driverHost;
|
||||
_authenticator = authenticator;
|
||||
_pipelineBuilder = pipelineBuilder ?? new DriverResiliencePipelineBuilder();
|
||||
_authzGate = authzGate;
|
||||
_scopeResolver = scopeResolver;
|
||||
_staleConfigFlag = staleConfigFlag;
|
||||
_tierLookup = tierLookup;
|
||||
_resilienceConfigLookup = resilienceConfigLookup;
|
||||
_equipmentContentLookup = equipmentContentLookup;
|
||||
_virtualReadable = virtualReadable;
|
||||
_scriptedAlarmReadable = scriptedAlarmReadable;
|
||||
_historyRouter = historyRouter;
|
||||
_alarmConditionService = alarmConditionService;
|
||||
_loggerFactory = loggerFactory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public OtOpcUaServer? Server => _server;
|
||||
|
||||
/// <summary>
|
||||
/// Late-bind the Phase 7 engine-backed <c>IReadable</c> sources. Must be
|
||||
/// called BEFORE <see cref="StartAsync"/> — once the OPC UA server starts, the
|
||||
/// <see cref="OtOpcUaServer"/> ctor captures the field values + per-node
|
||||
/// <see cref="DriverNodeManager"/>s are constructed. Calling this after start has
|
||||
/// no effect on already-materialized node managers.
|
||||
/// </summary>
|
||||
public void SetPhase7Sources(
|
||||
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? virtualReadable,
|
||||
ZB.MOM.WW.OtOpcUa.Core.Abstractions.IReadable? scriptedAlarmReadable)
|
||||
{
|
||||
if (_server is not null)
|
||||
throw new InvalidOperationException(
|
||||
"Phase 7 sources must be set before OpcUaApplicationHost.StartAsync; the OtOpcUaServer + DriverNodeManagers have already captured the previous values.");
|
||||
_virtualReadable = virtualReadable;
|
||||
_scriptedAlarmReadable = scriptedAlarmReadable;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Late-bind the Phase 6.2 authorization gate + node-scope resolver. Must be called
|
||||
/// BEFORE <see cref="StartAsync"/> — once the OPC UA server starts the
|
||||
/// <see cref="OtOpcUaServer"/> + per-namespace <see cref="DriverNodeManager"/>s
|
||||
/// capture these fields and later rebinding has no effect on already-materialized
|
||||
/// managers. Call with <c>null</c> for either parameter to leave the corresponding
|
||||
/// pipeline inert.
|
||||
/// </summary>
|
||||
public void SetAuthorization(AuthorizationGate? gate, NodeScopeResolver? resolver)
|
||||
{
|
||||
if (_server is not null)
|
||||
throw new InvalidOperationException(
|
||||
"Authorization must be set before OpcUaApplicationHost.StartAsync; the OtOpcUaServer + DriverNodeManagers have already captured the previous values.");
|
||||
_authzGate = gate;
|
||||
_scopeResolver = resolver;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the <see cref="ApplicationConfiguration"/>, validates/creates the application
|
||||
/// certificate, constructs + starts the <see cref="OtOpcUaServer"/>, then drives
|
||||
/// <see cref="GenericDriverNodeManager.BuildAddressSpaceAsync"/> per registered driver so
|
||||
/// the address space is populated before the first client connects.
|
||||
/// </summary>
|
||||
public async Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
_application = new ApplicationInstance
|
||||
{
|
||||
ApplicationName = _options.ApplicationName,
|
||||
ApplicationType = ApplicationType.Server,
|
||||
ApplicationConfiguration = BuildConfiguration(),
|
||||
};
|
||||
|
||||
var hasCert = await _application.CheckApplicationInstanceCertificate(silent: true, minimumKeySize: CertificateFactory.DefaultKeySize).ConfigureAwait(false);
|
||||
if (!hasCert)
|
||||
throw new InvalidOperationException(
|
||||
$"OPC UA application certificate could not be validated or created in {_options.PkiStoreRoot}");
|
||||
|
||||
_server = new OtOpcUaServer(_driverHost, _authenticator, _pipelineBuilder, _loggerFactory,
|
||||
authzGate: _authzGate, scopeResolver: _scopeResolver,
|
||||
tierLookup: _tierLookup, resilienceConfigLookup: _resilienceConfigLookup,
|
||||
virtualReadable: _virtualReadable, scriptedAlarmReadable: _scriptedAlarmReadable,
|
||||
anonymousRoles: _options.AnonymousRoles,
|
||||
historyRouter: _historyRouter, alarmConditionService: _alarmConditionService);
|
||||
await _application.Start(_server).ConfigureAwait(false);
|
||||
|
||||
_logger.LogInformation("OPC UA server started — endpoint={Endpoint} driverCount={Count}",
|
||||
_options.EndpointUrl, _server.DriverNodeManagers.Count);
|
||||
|
||||
// Phase 6.1 Stream C: health endpoints on :4841 (loopback by default — see
|
||||
// HealthEndpointsHost remarks for the Windows URL-ACL tradeoff).
|
||||
if (_options.HealthEndpointsEnabled)
|
||||
{
|
||||
_healthHost = new HealthEndpointsHost(
|
||||
_driverHost,
|
||||
_loggerFactory.CreateLogger<HealthEndpointsHost>(),
|
||||
usingStaleConfig: _staleConfigFlag is null ? null : () => _staleConfigFlag.IsStale,
|
||||
prefix: _options.HealthEndpointsPrefix);
|
||||
_healthHost.Start();
|
||||
}
|
||||
|
||||
// Drive each driver's discovery through its node manager. The node manager IS the
|
||||
// IAddressSpaceBuilder; GenericDriverNodeManager captures alarm-condition sinks into
|
||||
// its internal map and wires OnAlarmEvent → sink routing.
|
||||
//
|
||||
// ADR-001 Option A — when an EquipmentNamespaceContent is supplied for an
|
||||
// Equipment-kind driver, run the EquipmentNodeWalker BEFORE the driver's DiscoverAsync
|
||||
// so the UNS folder skeleton (Area/Line/Equipment) + Identification sub-folders +
|
||||
// the five identifier properties (decision #121) are in place. DiscoverAsync then
|
||||
// streams the driver's native shape on top; Tag rows bound to Equipment already
|
||||
// materialized via the walker don't get duplicated because the driver's DiscoverAsync
|
||||
// output is authoritative for its own native references only.
|
||||
foreach (var nodeManager in _server.DriverNodeManagers)
|
||||
{
|
||||
var driverId = nodeManager.Driver.DriverInstanceId;
|
||||
try
|
||||
{
|
||||
if (_equipmentContentLookup is not null)
|
||||
{
|
||||
var content = _equipmentContentLookup(driverId);
|
||||
if (content is not null)
|
||||
{
|
||||
ZB.MOM.WW.OtOpcUa.Core.OpcUa.EquipmentNodeWalker.Walk(nodeManager, content);
|
||||
_logger.LogInformation(
|
||||
"UNS walker populated {Areas} area(s), {Lines} line(s), {Equipment} equipment, {Tags} tag(s) for driver {Driver}",
|
||||
content.Areas.Count, content.Lines.Count, content.Equipment.Count, content.Tags.Count, driverId);
|
||||
}
|
||||
}
|
||||
|
||||
var generic = new GenericDriverNodeManager(nodeManager.Driver);
|
||||
await generic.BuildAddressSpaceAsync(nodeManager, ct).ConfigureAwait(false);
|
||||
_logger.LogInformation("Address space populated for driver {Driver}", driverId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Per decision #12: driver exceptions isolate — log and keep the server serving
|
||||
// the other drivers' subtrees. Re-building this one takes a Reinitialize call.
|
||||
_logger.LogError(ex, "Discovery failed for driver {Driver}; subtree faulted", driverId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ApplicationConfiguration BuildConfiguration()
|
||||
{
|
||||
Directory.CreateDirectory(_options.PkiStoreRoot);
|
||||
|
||||
var cfg = new ApplicationConfiguration
|
||||
{
|
||||
ApplicationName = _options.ApplicationName,
|
||||
ApplicationUri = _options.ApplicationUri,
|
||||
ApplicationType = ApplicationType.Server,
|
||||
ProductUri = "urn:OtOpcUa:Server",
|
||||
|
||||
SecurityConfiguration = new SecurityConfiguration
|
||||
{
|
||||
ApplicationCertificate = new CertificateIdentifier
|
||||
{
|
||||
StoreType = CertificateStoreType.Directory,
|
||||
StorePath = Path.Combine(_options.PkiStoreRoot, "own"),
|
||||
SubjectName = "CN=" + _options.ApplicationName,
|
||||
},
|
||||
TrustedIssuerCertificates = new CertificateTrustList
|
||||
{
|
||||
StoreType = CertificateStoreType.Directory,
|
||||
StorePath = Path.Combine(_options.PkiStoreRoot, "issuers"),
|
||||
},
|
||||
TrustedPeerCertificates = new CertificateTrustList
|
||||
{
|
||||
StoreType = CertificateStoreType.Directory,
|
||||
StorePath = Path.Combine(_options.PkiStoreRoot, "trusted"),
|
||||
},
|
||||
RejectedCertificateStore = new CertificateTrustList
|
||||
{
|
||||
StoreType = CertificateStoreType.Directory,
|
||||
StorePath = Path.Combine(_options.PkiStoreRoot, "rejected"),
|
||||
},
|
||||
AutoAcceptUntrustedCertificates = _options.AutoAcceptUntrustedClientCertificates,
|
||||
AddAppCertToTrustedStore = true,
|
||||
},
|
||||
|
||||
TransportConfigurations = new TransportConfigurationCollection(),
|
||||
TransportQuotas = new TransportQuotas { OperationTimeout = 15000 },
|
||||
|
||||
ServerConfiguration = new ServerConfiguration
|
||||
{
|
||||
BaseAddresses = new StringCollection { _options.EndpointUrl },
|
||||
SecurityPolicies = BuildSecurityPolicies(),
|
||||
UserTokenPolicies = BuildUserTokenPolicies(),
|
||||
MinRequestThreadCount = 5,
|
||||
MaxRequestThreadCount = 100,
|
||||
MaxQueuedRequestCount = 200,
|
||||
},
|
||||
|
||||
TraceConfiguration = new TraceConfiguration(),
|
||||
};
|
||||
|
||||
cfg.Validate(ApplicationType.Server).GetAwaiter().GetResult();
|
||||
|
||||
if (cfg.SecurityConfiguration.AutoAcceptUntrustedCertificates)
|
||||
{
|
||||
cfg.CertificateValidator.CertificateValidation += (_, e) =>
|
||||
{
|
||||
if (e.Error.StatusCode == StatusCodes.BadCertificateUntrusted)
|
||||
e.Accept = true;
|
||||
};
|
||||
}
|
||||
|
||||
return cfg;
|
||||
}
|
||||
|
||||
private ServerSecurityPolicyCollection BuildSecurityPolicies()
|
||||
{
|
||||
var policies = new ServerSecurityPolicyCollection
|
||||
{
|
||||
// Keep the None policy present so legacy clients can discover + browse. Locked-down
|
||||
// deployments remove this by setting Ldap.Enabled=true + dropping None here; left in
|
||||
// for PR 19 so the PR 17 test harness continues to pass unchanged.
|
||||
new ServerSecurityPolicy
|
||||
{
|
||||
SecurityMode = MessageSecurityMode.None,
|
||||
SecurityPolicyUri = SecurityPolicies.None,
|
||||
},
|
||||
};
|
||||
|
||||
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt)
|
||||
{
|
||||
policies.Add(new ServerSecurityPolicy
|
||||
{
|
||||
SecurityMode = MessageSecurityMode.SignAndEncrypt,
|
||||
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
||||
});
|
||||
}
|
||||
|
||||
return policies;
|
||||
}
|
||||
|
||||
private UserTokenPolicyCollection BuildUserTokenPolicies()
|
||||
{
|
||||
var tokens = new UserTokenPolicyCollection
|
||||
{
|
||||
new UserTokenPolicy(UserTokenType.Anonymous)
|
||||
{
|
||||
PolicyId = "Anonymous",
|
||||
SecurityPolicyUri = SecurityPolicies.None,
|
||||
},
|
||||
};
|
||||
|
||||
if (_options.SecurityProfile == OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt
|
||||
&& _options.Ldap.Enabled)
|
||||
{
|
||||
tokens.Add(new UserTokenPolicy(UserTokenType.UserName)
|
||||
{
|
||||
PolicyId = "UserName",
|
||||
// Passwords must ride an encrypted channel — scope this token to Basic256Sha256
|
||||
// so the stack rejects any attempt to send UserName over the None endpoint.
|
||||
SecurityPolicyUri = SecurityPolicies.Basic256Sha256,
|
||||
});
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
try
|
||||
{
|
||||
_server?.Stop();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "OPC UA server stop threw during dispose");
|
||||
}
|
||||
|
||||
if (_healthHost is not null)
|
||||
{
|
||||
try { await _healthHost.DisposeAsync().ConfigureAwait(false); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "Health endpoints host dispose threw"); }
|
||||
}
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// OPC UA transport security profile selector. Controls which <c>ServerSecurityPolicy</c>
|
||||
/// entries the endpoint advertises + which token types the <c>UserTokenPolicies</c> permits.
|
||||
/// </summary>
|
||||
public enum OpcUaSecurityProfile
|
||||
{
|
||||
/// <summary>Anonymous only on <c>SecurityPolicies.None</c> — dev-only, no signing or encryption.</summary>
|
||||
None,
|
||||
|
||||
/// <summary>
|
||||
/// <c>Basic256Sha256 SignAndEncrypt</c> with <c>UserName</c> and <c>Anonymous</c> token
|
||||
/// policies. Clients must present a valid application certificate + user credentials.
|
||||
/// </summary>
|
||||
Basic256Sha256SignAndEncrypt,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// OPC UA server endpoint + application-identity configuration. Bound from the
|
||||
/// <c>OpcUaServer</c> section of <c>appsettings.json</c>. PR 17 minimum-viable scope: no LDAP,
|
||||
/// no security profiles beyond None — those wire in alongside a future deployment-policy PR
|
||||
/// that reads from the central config DB instead of appsettings.
|
||||
/// </summary>
|
||||
public sealed class OpcUaServerOptions
|
||||
{
|
||||
public const string SectionName = "OpcUaServer";
|
||||
|
||||
/// <summary>
|
||||
/// Fully-qualified endpoint URI clients connect to. Use <c>0.0.0.0</c> to bind all
|
||||
/// interfaces; the stack rewrites to the machine's hostname for the returned endpoint
|
||||
/// description at GetEndpoints time.
|
||||
/// </summary>
|
||||
public string EndpointUrl { get; init; } = "opc.tcp://0.0.0.0:4840/OtOpcUa";
|
||||
|
||||
/// <summary>Human-readable application name surfaced in the endpoint description.</summary>
|
||||
public string ApplicationName { get; init; } = "OtOpcUa Server";
|
||||
|
||||
/// <summary>Stable application URI — must match the subjectAltName of the app cert.</summary>
|
||||
public string ApplicationUri { get; init; } = "urn:OtOpcUa:Server";
|
||||
|
||||
/// <summary>
|
||||
/// Directory where the OPC UA stack stores the application certificate + trusted /
|
||||
/// rejected cert folders. Defaults to <c>%ProgramData%\OtOpcUa\pki</c>; the stack
|
||||
/// creates the directory tree on first run and generates a self-signed cert.
|
||||
/// </summary>
|
||||
public string PkiStoreRoot { get; init; } =
|
||||
System.IO.Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData),
|
||||
"OtOpcUa", "pki");
|
||||
|
||||
/// <summary>
|
||||
/// When true, the stack auto-trusts client certs on first connect. Dev-default = true,
|
||||
/// production deployments should flip this to false and manually trust clients via the
|
||||
/// Admin UI.
|
||||
/// </summary>
|
||||
public bool AutoAcceptUntrustedClientCertificates { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Whether to start the Phase 6.1 Stream C <c>/healthz</c> + <c>/readyz</c> HTTP listener.
|
||||
/// Defaults to <c>true</c>; set false in embedded deployments that don't need HTTP
|
||||
/// (e.g. tests that only exercise the OPC UA surface).
|
||||
/// </summary>
|
||||
public bool HealthEndpointsEnabled { get; init; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// URL prefix the health endpoints bind to. Default <c>http://localhost:4841/</c> — loopback
|
||||
/// avoids Windows URL-ACL elevation. Production deployments that need remote probing should
|
||||
/// either reverse-proxy or use <c>http://+:4841/</c> with netsh urlacl granted.
|
||||
/// </summary>
|
||||
public string HealthEndpointsPrefix { get; init; } = "http://localhost:4841/";
|
||||
|
||||
/// <summary>
|
||||
/// Security profile advertised on the endpoint. Default <see cref="OpcUaSecurityProfile.None"/>
|
||||
/// preserves the PR 17 endpoint shape; set to <see cref="OpcUaSecurityProfile.Basic256Sha256SignAndEncrypt"/>
|
||||
/// for production deployments with LDAP-backed UserName auth.
|
||||
/// </summary>
|
||||
public OpcUaSecurityProfile SecurityProfile { get; init; } = OpcUaSecurityProfile.None;
|
||||
|
||||
/// <summary>
|
||||
/// LDAP binding for UserName token validation. Only consulted when the active
|
||||
/// <see cref="SecurityProfile"/> advertises a UserName token policy. When
|
||||
/// <c>LdapOptions.Enabled = false</c>, UserName token attempts are rejected.
|
||||
/// </summary>
|
||||
public LdapOptions Ldap { get; init; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Roles granted to anonymous OPC UA sessions. Default empty — anonymous clients can
|
||||
/// read <c>FreeAccess</c> attributes but cannot write <c>Operate</c>/<c>Tune</c>/
|
||||
/// <c>Configure</c> tags (<see cref="WriteAuthzPolicy"/> rejects the empty role set).
|
||||
/// Dev + smoke-test deployments that need anonymous writes populate this with the
|
||||
/// role names they want, e.g. <c>["WriteOperate"]</c> to match v1's anonymous-can-
|
||||
/// operate default. Production deployments leave it empty + route operators through
|
||||
/// UserName auth.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> AnonymousRoles { get; init; } = [];
|
||||
}
|
||||
204
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs
Normal file
204
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUa/OtOpcUaServer.cs
Normal file
@@ -0,0 +1,204 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Server;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="StandardServer"/> subclass that wires one <see cref="DriverNodeManager"/> per
|
||||
/// registered driver from <see cref="DriverHost"/>. Anonymous endpoint on
|
||||
/// <c>opc.tcp://0.0.0.0:4840</c>, no security — PR 16 minimum-viable scope; LDAP + security
|
||||
/// profiles are deferred to their own PR on top of this.
|
||||
/// </summary>
|
||||
public sealed class OtOpcUaServer : StandardServer
|
||||
{
|
||||
private readonly DriverHost _driverHost;
|
||||
private readonly IUserAuthenticator _authenticator;
|
||||
private readonly DriverResiliencePipelineBuilder _pipelineBuilder;
|
||||
private readonly AuthorizationGate? _authzGate;
|
||||
private readonly NodeScopeResolver? _scopeResolver;
|
||||
private readonly Func<string, DriverTier>? _tierLookup;
|
||||
private readonly Func<string, string?>? _resilienceConfigLookup;
|
||||
|
||||
// Phase 7 Stream G follow-up wiring (task #239). Shared across every DriverNodeManager
|
||||
// instantiated by this server so virtual-tag reads and scripted-alarm reads from any
|
||||
// driver's address-space subtree route to the same engine. When null (no Phase 7
|
||||
// engines composed for this deployment) DriverNodeManager falls back to driver-only
|
||||
// dispatch — identical to pre-Phase-7 behaviour.
|
||||
private readonly IReadable? _virtualReadable;
|
||||
private readonly IReadable? _scriptedAlarmReadable;
|
||||
|
||||
// PR 1+2.W — server-level singletons shared across every DriverNodeManager.
|
||||
// Null when the deployment hasn't opted into the new server-side history routing /
|
||||
// server-side alarm-condition state machine; DriverNodeManager falls back to the
|
||||
// legacy per-driver IHistoryProvider + IAlarmSource paths in that case.
|
||||
private readonly IHistoryRouter? _historyRouter;
|
||||
private readonly AlarmConditionService? _alarmConditionService;
|
||||
|
||||
/// <summary>
|
||||
/// Roles granted to anonymous sessions. When non-empty, <see cref="OnImpersonateUser"/>
|
||||
/// wraps <c>AnonymousIdentityToken</c> in a <see cref="RoleBasedIdentity"/> carrying
|
||||
/// these roles so <see cref="DriverNodeManager"/>'s write-authz check passes for
|
||||
/// matching classifications. Empty (the default) preserves the pre-existing behaviour
|
||||
/// of rejecting anonymous writes at <c>Operate</c> or higher.
|
||||
/// </summary>
|
||||
private readonly IReadOnlyList<string> _anonymousRoles;
|
||||
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly List<DriverNodeManager> _driverNodeManagers = new();
|
||||
|
||||
public OtOpcUaServer(
|
||||
DriverHost driverHost,
|
||||
IUserAuthenticator authenticator,
|
||||
DriverResiliencePipelineBuilder pipelineBuilder,
|
||||
ILoggerFactory loggerFactory,
|
||||
AuthorizationGate? authzGate = null,
|
||||
NodeScopeResolver? scopeResolver = null,
|
||||
Func<string, DriverTier>? tierLookup = null,
|
||||
Func<string, string?>? resilienceConfigLookup = null,
|
||||
IReadable? virtualReadable = null,
|
||||
IReadable? scriptedAlarmReadable = null,
|
||||
IReadOnlyList<string>? anonymousRoles = null,
|
||||
IHistoryRouter? historyRouter = null,
|
||||
AlarmConditionService? alarmConditionService = null)
|
||||
{
|
||||
_driverHost = driverHost;
|
||||
_authenticator = authenticator;
|
||||
_pipelineBuilder = pipelineBuilder;
|
||||
_authzGate = authzGate;
|
||||
_scopeResolver = scopeResolver;
|
||||
_tierLookup = tierLookup;
|
||||
_resilienceConfigLookup = resilienceConfigLookup;
|
||||
_virtualReadable = virtualReadable;
|
||||
_scriptedAlarmReadable = scriptedAlarmReadable;
|
||||
_anonymousRoles = anonymousRoles ?? [];
|
||||
_historyRouter = historyRouter;
|
||||
_alarmConditionService = alarmConditionService;
|
||||
_loggerFactory = loggerFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read-only snapshot of the driver node managers materialized at server start. Used by
|
||||
/// the generic-driver-node-manager-driven discovery flow after the server starts — the
|
||||
/// host walks each entry and invokes
|
||||
/// <c>GenericDriverNodeManager.BuildAddressSpaceAsync(manager)</c> passing the manager
|
||||
/// as its own <see cref="IAddressSpaceBuilder"/>.
|
||||
/// </summary>
|
||||
public IReadOnlyList<DriverNodeManager> DriverNodeManagers => _driverNodeManagers;
|
||||
|
||||
protected override MasterNodeManager CreateMasterNodeManager(IServerInternal server, ApplicationConfiguration configuration)
|
||||
{
|
||||
foreach (var driverId in _driverHost.RegisteredDriverIds)
|
||||
{
|
||||
var driver = _driverHost.GetDriver(driverId);
|
||||
if (driver is null) continue;
|
||||
|
||||
var logger = _loggerFactory.CreateLogger<DriverNodeManager>();
|
||||
// Per-driver resilience options: tier comes from lookup (Phase 6.1 Stream B.1
|
||||
// DriverTypeRegistry in the prod wire-up) or falls back to Tier A. ResilienceConfig
|
||||
// JSON comes from the DriverInstance row via the optional lookup Func; parser
|
||||
// layers JSON overrides on top of tier defaults (Phase 6.1 Stream A.2).
|
||||
var tier = _tierLookup?.Invoke(driver.DriverType) ?? DriverTier.A;
|
||||
var resilienceJson = _resilienceConfigLookup?.Invoke(driver.DriverInstanceId);
|
||||
var options = DriverResilienceOptionsParser.ParseOrDefaults(tier, resilienceJson, out var diag);
|
||||
if (diag is not null)
|
||||
logger.LogWarning("ResilienceConfig parse diagnostic for driver {DriverId}: {Diag}", driver.DriverInstanceId, diag);
|
||||
|
||||
var invoker = new CapabilityInvoker(_pipelineBuilder, driver.DriverInstanceId, () => options, driver.DriverType);
|
||||
var manager = new DriverNodeManager(server, configuration, driver, invoker, logger,
|
||||
authzGate: _authzGate, scopeResolver: _scopeResolver,
|
||||
virtualReadable: _virtualReadable, scriptedAlarmReadable: _scriptedAlarmReadable,
|
||||
historyRouter: _historyRouter, alarmService: _alarmConditionService);
|
||||
|
||||
// The router stays empty after PR 1+2.W — DriverNodeManager's internal
|
||||
// LegacyDriverHistoryAdapter handles every driver that still implements
|
||||
// IHistoryProvider. PR 3.W will register the Wonderware sidecar as a router
|
||||
// source; PR 7.2 retires the legacy fallback entirely.
|
||||
|
||||
_driverNodeManagers.Add(manager);
|
||||
}
|
||||
|
||||
return new MasterNodeManager(server, configuration, null, _driverNodeManagers.ToArray());
|
||||
}
|
||||
|
||||
protected override void OnServerStarted(IServerInternal server)
|
||||
{
|
||||
base.OnServerStarted(server);
|
||||
// Hook UserName / Anonymous token validation here. Anonymous passes through; UserName
|
||||
// is validated against the IUserAuthenticator (LDAP in production). Rejected identities
|
||||
// throw ServiceResultException which the stack translates to Bad_IdentityTokenInvalid.
|
||||
server.SessionManager.ImpersonateUser += OnImpersonateUser;
|
||||
}
|
||||
|
||||
private void OnImpersonateUser(Session session, ImpersonateEventArgs args)
|
||||
{
|
||||
switch (args.NewIdentity)
|
||||
{
|
||||
case AnonymousIdentityToken:
|
||||
args.Identity = _anonymousRoles.Count == 0
|
||||
? new UserIdentity() // anonymous, no roles — production default
|
||||
: new RoleBasedIdentity("(anonymous)", "Anonymous", _anonymousRoles, ldapGroups: []);
|
||||
return;
|
||||
|
||||
case UserNameIdentityToken user:
|
||||
{
|
||||
var result = _authenticator.AuthenticateAsync(
|
||||
user.UserName, user.DecryptedPassword, CancellationToken.None)
|
||||
.GetAwaiter().GetResult();
|
||||
if (!result.Success)
|
||||
{
|
||||
throw ServiceResultException.Create(
|
||||
StatusCodes.BadUserAccessDenied,
|
||||
"Invalid username or password ({0})", result.Error ?? "no detail");
|
||||
}
|
||||
args.Identity = new RoleBasedIdentity(user.UserName, result.DisplayName, result.Roles, result.Groups);
|
||||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
throw ServiceResultException.Create(
|
||||
StatusCodes.BadIdentityTokenInvalid,
|
||||
"Unsupported user identity token type: {0}", args.NewIdentity?.GetType().Name ?? "null");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tiny UserIdentity carrier that preserves the resolved roles + LDAP groups so downstream
|
||||
/// node managers can gate writes/reads via <c>session.Identity</c>. Implements both
|
||||
/// <see cref="IRoleBearer"/> (control-plane: WriteAuthzPolicy + Admin role mapping) and
|
||||
/// <see cref="ILdapGroupsBearer"/> (data-plane: <see cref="AuthorizationGate"/> evaluator).
|
||||
/// Anonymous identity (no roles configured) still uses the stack's default UserIdentity.
|
||||
/// </summary>
|
||||
private sealed class RoleBasedIdentity : UserIdentity, IRoleBearer, ILdapGroupsBearer
|
||||
{
|
||||
public IReadOnlyList<string> Roles { get; }
|
||||
public IReadOnlyList<string> LdapGroups { get; }
|
||||
public string? Display { get; }
|
||||
|
||||
public RoleBasedIdentity(string userName, string? displayName, IReadOnlyList<string> roles, IReadOnlyList<string> ldapGroups)
|
||||
: base(userName, "")
|
||||
{
|
||||
Display = displayName;
|
||||
Roles = roles;
|
||||
LdapGroups = ldapGroups;
|
||||
}
|
||||
}
|
||||
|
||||
protected override ServerProperties LoadServerProperties() => new()
|
||||
{
|
||||
ManufacturerName = "OtOpcUa",
|
||||
ProductName = "OtOpcUa.Server",
|
||||
ProductUri = "urn:OtOpcUa:Server",
|
||||
SoftwareVersion = "2.0.0",
|
||||
BuildNumber = "0",
|
||||
BuildDate = DateTime.UtcNow,
|
||||
};
|
||||
}
|
||||
122
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs
Normal file
122
src/Server/ZB.MOM.WW.OtOpcUa.Server/OpcUaServerService.cs
Normal file
@@ -0,0 +1,122 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// BackgroundService that owns the OPC UA server lifecycle (decision #30, replacing TopShelf).
|
||||
/// Bootstraps config, starts the <see cref="DriverHost"/>, starts the OPC UA server via
|
||||
/// <see cref="OpcUaApplicationHost"/>, drives each driver's discovery into the address space,
|
||||
/// runs until stopped.
|
||||
/// </summary>
|
||||
public sealed class OpcUaServerService(
|
||||
NodeBootstrap bootstrap,
|
||||
DriverHost driverHost,
|
||||
OpcUaApplicationHost applicationHost,
|
||||
DriverEquipmentContentRegistry equipmentContentRegistry,
|
||||
DriverInstanceBootstrapper driverBootstrapper,
|
||||
Phase7Composer phase7Composer,
|
||||
AuthorizationBootstrap authorizationBootstrap,
|
||||
IServiceScopeFactory scopeFactory,
|
||||
ILogger<OpcUaServerService> logger) : BackgroundService
|
||||
{
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
logger.LogInformation("OtOpcUa.Server starting");
|
||||
|
||||
var result = await bootstrap.LoadCurrentGenerationAsync(stoppingToken);
|
||||
logger.LogInformation("Bootstrap complete: source={Source} generation={Gen}", result.Source, result.GenerationId);
|
||||
|
||||
// ADR-001 Option A — populate per-driver Equipment namespace snapshots into the
|
||||
// registry before StartAsync walks the address space. The walker on the OPC UA side
|
||||
// reads synchronously from the registry; pre-loading here means the hot path stays
|
||||
// non-blocking + each driver pays at most one Config-DB query at bootstrap time.
|
||||
// Skipped when no generation is Published yet — the fleet boots into a UNS-less
|
||||
// address space until the first publish, then the registry fills on next restart.
|
||||
if (result.GenerationId is { } gen)
|
||||
{
|
||||
// Task #248 — register IDriver instances from the published DriverInstance
|
||||
// rows BEFORE the equipment-content load + Phase 7 compose, so the rest of
|
||||
// the pipeline sees a populated DriverHost. Without this step Phase 7's
|
||||
// CachedTagUpstreamSource has no upstream feed + virtual-tag scripts read
|
||||
// BadNodeIdUnknown for every tag path (gap surfaced by task #240 smoke).
|
||||
await driverBootstrapper.RegisterDriversFromGenerationAsync(gen, stoppingToken);
|
||||
|
||||
await PopulateEquipmentContentAsync(gen, stoppingToken);
|
||||
|
||||
// Phase 7 follow-up #246 — load Script + VirtualTag + ScriptedAlarm rows,
|
||||
// compose VirtualTagEngine + ScriptedAlarmEngine, start the driver-bridge
|
||||
// feed. SetPhase7Sources MUST run before applicationHost.StartAsync because
|
||||
// OtOpcUaServer + DriverNodeManager construction captures the field values
|
||||
// — late binding after server start is rejected with InvalidOperationException.
|
||||
// No-op when the generation has no virtual tags or scripted alarms.
|
||||
var phase7 = await phase7Composer.PrepareAsync(gen, stoppingToken);
|
||||
applicationHost.SetPhase7Sources(phase7.VirtualReadable, phase7.ScriptedAlarmReadable);
|
||||
|
||||
// Phase 6.2 Stream C wiring — build the AuthorizationGate + NodeScopeResolver
|
||||
// from the published generation's NodeAcl rows and the populated equipment
|
||||
// registry. No-op when Node:Authorization:Enabled=false. Must run before
|
||||
// StartAsync: OtOpcUaServer + DriverNodeManager construction captures the
|
||||
// field values on the application host.
|
||||
var (authzGate, scopeResolver) = await authorizationBootstrap
|
||||
.BuildAsync(gen, stoppingToken).ConfigureAwait(false);
|
||||
applicationHost.SetAuthorization(authzGate, scopeResolver);
|
||||
}
|
||||
|
||||
await applicationHost.StartAsync(stoppingToken);
|
||||
|
||||
logger.LogInformation("OtOpcUa.Server running. Hosted drivers: {Count}", driverHost.RegisteredDriverIds.Count);
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(Timeout.InfiniteTimeSpan, stoppingToken);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
logger.LogInformation("OtOpcUa.Server stopping");
|
||||
}
|
||||
}
|
||||
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
await base.StopAsync(cancellationToken);
|
||||
// Dispose Phase 7 first so the bridge stops feeding the cache + the engines
|
||||
// stop firing alarm/historian events before the OPC UA server tears down its
|
||||
// node managers. Otherwise an in-flight cascade could try to push through a
|
||||
// disposed source and surface as a noisy shutdown warning.
|
||||
await phase7Composer.DisposeAsync();
|
||||
await applicationHost.DisposeAsync();
|
||||
await driverHost.DisposeAsync();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pre-load an <c>EquipmentNamespaceContent</c> snapshot for each registered driver at
|
||||
/// the bootstrapped generation. Null results (driver has no Equipment rows —
|
||||
/// Modbus/AB CIP/TwinCAT/FOCAS today per decisions #116–#121) are skipped: the walker
|
||||
/// wire-in sees Get(driverId) return null + falls back to DiscoverAsync-owns-it.
|
||||
/// Opens one scope so the scoped <c>OtOpcUaConfigDbContext</c> is shared across all
|
||||
/// per-driver queries rather than paying scope-setup overhead per driver.
|
||||
/// </summary>
|
||||
private async Task PopulateEquipmentContentAsync(long generationId, CancellationToken ct)
|
||||
{
|
||||
using var scope = scopeFactory.CreateScope();
|
||||
var loader = scope.ServiceProvider.GetRequiredService<EquipmentNamespaceContentLoader>();
|
||||
|
||||
var loaded = 0;
|
||||
foreach (var driverId in driverHost.RegisteredDriverIds)
|
||||
{
|
||||
var content = await loader.LoadAsync(driverId, generationId, ct).ConfigureAwait(false);
|
||||
if (content is null) continue;
|
||||
equipmentContentRegistry.Set(driverId, content);
|
||||
loaded++;
|
||||
}
|
||||
logger.LogInformation(
|
||||
"Equipment namespace snapshots loaded for {Count}/{Total} driver(s) at generation {Gen}",
|
||||
loaded, driverHost.RegisteredDriverIds.Count, generationId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
|
||||
/// <summary>
|
||||
/// Production <c>ITagUpstreamSource</c> for the Phase 7 engines (implements both the
|
||||
/// Core.VirtualTags and Core.ScriptedAlarms variants — identical shape, distinct
|
||||
/// namespaces). Per the interface docstring, reads are synchronous — user scripts
|
||||
/// call <c>ctx.GetTag</c> inline — so we serve from a last-known-value cache that
|
||||
/// the driver-bridge populates asynchronously via <see cref="Push"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <see cref="Push"/> is called by the driver-bridge (wiring added by task #244)
|
||||
/// every time a driver's <c>ISubscribable.OnDataChange</c> fires. Subscribers
|
||||
/// registered via <see cref="SubscribeTag"/> are notified synchronously on the
|
||||
/// calling thread — the VirtualTagEngine + ScriptedAlarmEngine handle their own
|
||||
/// async hand-off via <c>SemaphoreSlim</c>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Reads of a path that has never been <see cref="Push"/>-ed return
|
||||
/// <see cref="UpstreamNotConfigured"/>-quality — which scripts see as
|
||||
/// <c>ctx.GetTag("...").StatusCode == BadNodeIdUnknown</c> and can branch on.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class CachedTagUpstreamSource
|
||||
: Core.VirtualTags.ITagUpstreamSource,
|
||||
Core.ScriptedAlarms.ITagUpstreamSource
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, DataValueSnapshot> _values = new(StringComparer.Ordinal);
|
||||
private readonly ConcurrentDictionary<string, List<Action<string, DataValueSnapshot>>> _observers
|
||||
= new(StringComparer.Ordinal);
|
||||
|
||||
public DataValueSnapshot ReadTag(string path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
|
||||
return _values.TryGetValue(path, out var snap)
|
||||
? snap
|
||||
: new DataValueSnapshot(null, UpstreamNotConfigured, null, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
public IDisposable SubscribeTag(string path, Action<string, DataValueSnapshot> observer)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
|
||||
ArgumentNullException.ThrowIfNull(observer);
|
||||
|
||||
var list = _observers.GetOrAdd(path, _ => []);
|
||||
lock (list) list.Add(observer);
|
||||
return new Unsub(this, path, observer);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Driver-bridge write path — called when a driver delivers a value change for
|
||||
/// <paramref name="path"/>. Updates the cache + fans out to every observer.
|
||||
/// Safe for concurrent callers; observers fire on the caller's thread.
|
||||
/// </summary>
|
||||
public void Push(string path, DataValueSnapshot snapshot)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path)) throw new ArgumentException("path required", nameof(path));
|
||||
ArgumentNullException.ThrowIfNull(snapshot);
|
||||
|
||||
_values[path] = snapshot;
|
||||
if (!_observers.TryGetValue(path, out var list)) return;
|
||||
Action<string, DataValueSnapshot>[] snapshotList;
|
||||
lock (list) snapshotList = list.ToArray();
|
||||
foreach (var observer in snapshotList) observer(path, snapshot);
|
||||
}
|
||||
|
||||
/// <summary>Mirror of OPC UA <c>StatusCodes.BadNodeIdUnknown</c> without pulling the OPC stack dependency.</summary>
|
||||
public const uint UpstreamNotConfigured = 0x80340000;
|
||||
|
||||
private sealed class Unsub(CachedTagUpstreamSource owner, string path, Action<string, DataValueSnapshot> observer) : IDisposable
|
||||
{
|
||||
private bool _disposed;
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
if (owner._observers.TryGetValue(path, out var list))
|
||||
lock (list) list.Remove(observer);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,146 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 7 follow-up (task #244). Subscribes to live driver <see cref="ISubscribable"/>
|
||||
/// surfaces for every input path the Phase 7 engines care about + pushes incoming
|
||||
/// <see cref="DataChangeEventArgs.Snapshot"/>s into <see cref="CachedTagUpstreamSource"/>
|
||||
/// so <c>ctx.GetTag</c> reads see the freshest driver value.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Each <see cref="DriverFeed"/> declares a driver + the path-to-fullRef map for the
|
||||
/// attributes that driver provides. The bridge groups by driver so each <see cref="ISubscribable"/>
|
||||
/// gets one <c>SubscribeAsync</c> call with a batched fullRef list — drivers that
|
||||
/// poll under the hood (Modbus, AB CIP, S7) consolidate the polls; drivers with
|
||||
/// native subscriptions (Galaxy, OPC UA Client, TwinCAT) get a single watch list.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Because driver fullRefs are opaque + driver-specific (Galaxy
|
||||
/// <c>"DelmiaReceiver_001.Temp"</c>, Modbus <c>"40001"</c>, AB CIP
|
||||
/// <c>"Temperature[0]"</c>), the bridge keeps a per-feed reverse map from fullRef
|
||||
/// back to UNS path. <c>OnDataChange</c> fires keyed by fullRef; the bridge
|
||||
/// translates to the script-side path before calling <see cref="CachedTagUpstreamSource.Push"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Lifecycle: construct → <see cref="StartAsync"/> with the feeds → keep alive
|
||||
/// alongside the engines → <see cref="DisposeAsync"/> unsubscribes from every
|
||||
/// driver + unhooks the OnDataChange handlers. Driver subscriptions don't leak
|
||||
/// even on abnormal shutdown because the disposal awaits each
|
||||
/// <c>UnsubscribeAsync</c>.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class DriverSubscriptionBridge : IAsyncDisposable
|
||||
{
|
||||
private readonly CachedTagUpstreamSource _sink;
|
||||
private readonly ILogger<DriverSubscriptionBridge> _logger;
|
||||
private readonly List<ActiveSubscription> _active = [];
|
||||
private bool _started;
|
||||
private bool _disposed;
|
||||
|
||||
public DriverSubscriptionBridge(
|
||||
CachedTagUpstreamSource sink,
|
||||
ILogger<DriverSubscriptionBridge> logger)
|
||||
{
|
||||
_sink = sink ?? throw new ArgumentNullException(nameof(sink));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Subscribe each feed's driver to its declared fullRefs + wire push-to-cache.
|
||||
/// Idempotent guard rejects double-start. Throws on the first subscribe failure
|
||||
/// so misconfiguration surfaces fast — partial-subscribe state doesn't linger.
|
||||
/// </summary>
|
||||
public async Task StartAsync(IEnumerable<DriverFeed> feeds, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(feeds);
|
||||
if (_disposed) throw new ObjectDisposedException(nameof(DriverSubscriptionBridge));
|
||||
if (_started) throw new InvalidOperationException("DriverSubscriptionBridge already started");
|
||||
_started = true;
|
||||
|
||||
foreach (var feed in feeds)
|
||||
{
|
||||
if (feed.PathToFullRef.Count == 0) continue;
|
||||
|
||||
// Reverse map for OnDataChange dispatch — driver fires keyed by FullReference,
|
||||
// we push keyed by the script-side path.
|
||||
var fullRefToPath = feed.PathToFullRef
|
||||
.ToDictionary(kv => kv.Value, kv => kv.Key, StringComparer.Ordinal);
|
||||
var fullRefs = feed.PathToFullRef.Values.Distinct(StringComparer.Ordinal).ToList();
|
||||
|
||||
EventHandler<DataChangeEventArgs> handler = (_, e) =>
|
||||
{
|
||||
if (fullRefToPath.TryGetValue(e.FullReference, out var unsPath))
|
||||
_sink.Push(unsPath, e.Snapshot);
|
||||
};
|
||||
feed.Driver.OnDataChange += handler;
|
||||
|
||||
try
|
||||
{
|
||||
// OTOPCUA0001 suppression — the analyzer flags ISubscribable calls outside
|
||||
// CapabilityInvoker. This bridge IS the lifecycle-coordinator for Phase 7
|
||||
// subscriptions: it runs once at engine compose, doesn't hot-path per
|
||||
// script evaluation (the engines read from the cache instead), and surfaces
|
||||
// any subscribe failure by aborting bridge start. Wrapping in the per-call
|
||||
// resilience pipeline would add nothing — there's no caller to retry on
|
||||
// behalf of, and the breaker/bulkhead semantics belong to actual driver Read
|
||||
// dispatch, which still goes through CapabilityInvoker via DriverNodeManager.
|
||||
#pragma warning disable OTOPCUA0001
|
||||
var handle = await feed.Driver.SubscribeAsync(fullRefs, feed.PublishingInterval, ct).ConfigureAwait(false);
|
||||
#pragma warning restore OTOPCUA0001
|
||||
_active.Add(new ActiveSubscription(feed.Driver, handle, handler));
|
||||
_logger.LogInformation(
|
||||
"Phase 7 bridge subscribed {Count} attribute(s) from driver {Driver} (handle {Handle})",
|
||||
fullRefs.Count, feed.Driver.GetType().Name, handle.DiagnosticId);
|
||||
}
|
||||
catch
|
||||
{
|
||||
feed.Driver.OnDataChange -= handler;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
foreach (var sub in _active)
|
||||
{
|
||||
sub.Driver.OnDataChange -= sub.Handler;
|
||||
try
|
||||
{
|
||||
#pragma warning disable OTOPCUA0001 // bridge lifecycle — see StartAsync suppression rationale
|
||||
await sub.Driver.UnsubscribeAsync(sub.Handle, CancellationToken.None).ConfigureAwait(false);
|
||||
#pragma warning restore OTOPCUA0001
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Driver {Driver} UnsubscribeAsync threw on bridge dispose (handle {Handle})",
|
||||
sub.Driver.GetType().Name, sub.Handle.DiagnosticId);
|
||||
}
|
||||
}
|
||||
_active.Clear();
|
||||
}
|
||||
|
||||
private sealed record ActiveSubscription(
|
||||
ISubscribable Driver,
|
||||
ISubscriptionHandle Handle,
|
||||
EventHandler<DataChangeEventArgs> Handler);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One driver's contribution to the Phase 7 bridge — the driver's <see cref="ISubscribable"/>
|
||||
/// surface plus the path-to-fullRef map the bridge uses to translate driver-side
|
||||
/// <see cref="DataChangeEventArgs.FullReference"/> back to script-side paths.
|
||||
/// </summary>
|
||||
/// <param name="Driver">The driver's subscribable surface (every shipped driver implements <see cref="ISubscribable"/>).</param>
|
||||
/// <param name="PathToFullRef">UNS path the script uses → driver-opaque fullRef. Empty map = nothing to subscribe (skipped).</param>
|
||||
/// <param name="PublishingInterval">Forwarded to the driver's <see cref="ISubscribable.SubscribeAsync"/>.</param>
|
||||
public sealed record DriverFeed(
|
||||
ISubscribable Driver,
|
||||
IReadOnlyDictionary<string, string> PathToFullRef,
|
||||
TimeSpan PublishingInterval);
|
||||
266
src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs
Normal file
266
src/Server/ZB.MOM.WW.OtOpcUa.Server/Phase7/Phase7Composer.cs
Normal file
@@ -0,0 +1,266 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 7 follow-up (task #246) — orchestrates the runtime composition of virtual
|
||||
/// tags + scripted alarms + the historian sink + the driver-bridge that feeds the
|
||||
/// engines. Called by <see cref="OpcUaServerService"/> after the bootstrap generation
|
||||
/// loads + before <see cref="OpcUaApplicationHost.StartAsync"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <see cref="PrepareAsync"/> reads Script / VirtualTag / ScriptedAlarm rows from
|
||||
/// the central config DB at the bootstrapped generation, instantiates a
|
||||
/// <see cref="CachedTagUpstreamSource"/>, runs <see cref="Phase7EngineComposer.Compose"/>,
|
||||
/// starts a <see cref="DriverSubscriptionBridge"/> per registered driver feeding
|
||||
/// <see cref="EquipmentNamespaceContent"/>'s tag rows into the cache, and returns
|
||||
/// the engine-backed <see cref="Core.Abstractions.IReadable"/> sources for
|
||||
/// <see cref="OpcUaApplicationHost.SetPhase7Sources"/>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <see cref="DisposeAsync"/> tears down the bridge first (so no more events
|
||||
/// arrive at the cache), then the engines (so cascades + timer ticks stop), then
|
||||
/// the SQLite sink (which flushes any in-flight drain). Lifetime is owned by the
|
||||
/// host; <see cref="OpcUaServerService.StopAsync"/> calls dispose during graceful
|
||||
/// shutdown.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class Phase7Composer : IAsyncDisposable
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly DriverHost _driverHost;
|
||||
private readonly DriverEquipmentContentRegistry _equipmentRegistry;
|
||||
private readonly IAlarmHistorianSink _historianSink;
|
||||
private readonly IAlarmHistorianWriter? _injectedWriter;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly Serilog.ILogger _scriptLogger;
|
||||
private readonly ILogger<Phase7Composer> _logger;
|
||||
|
||||
private DriverSubscriptionBridge? _bridge;
|
||||
private Phase7ComposedSources _sources = Phase7ComposedSources.Empty;
|
||||
// Sink we constructed in PrepareAsync (vs. the injected fallback). Held so
|
||||
// DisposeAsync can flush + tear down the SQLite drain timer.
|
||||
private SqliteStoreAndForwardSink? _ownedSink;
|
||||
private bool _disposed;
|
||||
|
||||
public Phase7Composer(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
DriverHost driverHost,
|
||||
DriverEquipmentContentRegistry equipmentRegistry,
|
||||
IAlarmHistorianSink historianSink,
|
||||
ILoggerFactory loggerFactory,
|
||||
Serilog.ILogger scriptLogger,
|
||||
ILogger<Phase7Composer> logger,
|
||||
IAlarmHistorianWriter? injectedWriter = null)
|
||||
{
|
||||
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
|
||||
_driverHost = driverHost ?? throw new ArgumentNullException(nameof(driverHost));
|
||||
_equipmentRegistry = equipmentRegistry ?? throw new ArgumentNullException(nameof(equipmentRegistry));
|
||||
_historianSink = historianSink ?? throw new ArgumentNullException(nameof(historianSink));
|
||||
_injectedWriter = injectedWriter;
|
||||
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
|
||||
_scriptLogger = scriptLogger ?? throw new ArgumentNullException(nameof(scriptLogger));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public Phase7ComposedSources Sources => _sources;
|
||||
|
||||
public async Task<Phase7ComposedSources> PrepareAsync(long generationId, CancellationToken ct)
|
||||
{
|
||||
if (_disposed) throw new ObjectDisposedException(nameof(Phase7Composer));
|
||||
|
||||
// Load the three Phase 7 row sets in one DB scope.
|
||||
List<Script> scripts;
|
||||
List<VirtualTag> virtualTags;
|
||||
List<ScriptedAlarm> scriptedAlarms;
|
||||
using (var scope = _scopeFactory.CreateScope())
|
||||
{
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
scripts = await db.Scripts.AsNoTracking()
|
||||
.Where(s => s.GenerationId == generationId).ToListAsync(ct).ConfigureAwait(false);
|
||||
virtualTags = await db.VirtualTags.AsNoTracking()
|
||||
.Where(v => v.GenerationId == generationId && v.Enabled).ToListAsync(ct).ConfigureAwait(false);
|
||||
scriptedAlarms = await db.ScriptedAlarms.AsNoTracking()
|
||||
.Where(a => a.GenerationId == generationId && a.Enabled).ToListAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
if (virtualTags.Count == 0 && scriptedAlarms.Count == 0)
|
||||
{
|
||||
_logger.LogInformation("Phase 7: no virtual tags or scripted alarms in generation {Gen}; engines dormant", generationId);
|
||||
return Phase7ComposedSources.Empty;
|
||||
}
|
||||
|
||||
var upstream = new CachedTagUpstreamSource();
|
||||
|
||||
// Phase 7 follow-up #247 — if any registered driver implements IAlarmHistorianWriter
|
||||
// (today: GalaxyProxyDriver), wrap it in a SqliteStoreAndForwardSink at
|
||||
// %ProgramData%/OtOpcUa/alarm-historian-queue.db with the 2s drain cadence the
|
||||
// sink's docstring recommends. Otherwise fall back to the injected sink (Null in
|
||||
// the default registration).
|
||||
var historianSink = ResolveHistorianSink();
|
||||
|
||||
_sources = Phase7EngineComposer.Compose(
|
||||
scripts: scripts,
|
||||
virtualTags: virtualTags,
|
||||
scriptedAlarms: scriptedAlarms,
|
||||
upstream: upstream,
|
||||
alarmStateStore: new InMemoryAlarmStateStore(),
|
||||
historianSink: historianSink,
|
||||
rootScriptLogger: _scriptLogger,
|
||||
loggerFactory: _loggerFactory);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Phase 7: composed engines from generation {Gen} — {Vt} virtual tag(s), {Al} scripted alarm(s), {Sc} script(s)",
|
||||
generationId, virtualTags.Count, scriptedAlarms.Count, scripts.Count);
|
||||
|
||||
// Build driver feeds from each registered driver's EquipmentNamespaceContent + start
|
||||
// the bridge. Drivers without populated content (Galaxy SystemPlatform-kind, drivers
|
||||
// whose Equipment rows haven't been published yet) contribute an empty feed which
|
||||
// the bridge silently skips.
|
||||
_bridge = new DriverSubscriptionBridge(upstream, _loggerFactory.CreateLogger<DriverSubscriptionBridge>());
|
||||
var feeds = BuildDriverFeeds(_driverHost, _equipmentRegistry);
|
||||
await _bridge.StartAsync(feeds, ct).ConfigureAwait(false);
|
||||
|
||||
return _sources;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolution order for the alarm-historian writer:
|
||||
/// <list type="number">
|
||||
/// <item><description>Any registered driver that implements <see cref="IAlarmHistorianWriter"/> (today: none — Galaxy used to via the legacy GalaxyProxyDriver).</description></item>
|
||||
/// <item><description>The DI-registered <see cref="IAlarmHistorianWriter"/> (PR B.4 — the WonderwareHistorianClient sidecar writer when <c>Historian:Wonderware:Enabled=true</c>).</description></item>
|
||||
/// <item><description><c>null</c> — caller falls back to the injected <see cref="IAlarmHistorianSink"/> (NullAlarmHistorianSink in the default registration).</description></item>
|
||||
/// </list>
|
||||
/// Driver-provided writers win over the DI-registered sidecar so a future
|
||||
/// GalaxyDriver-as-IAlarmHistorianWriter takes the write path directly,
|
||||
/// preserving the v1 invariant where a driver that natively owns the
|
||||
/// historian client doesn't bounce through the sidecar IPC.
|
||||
/// </summary>
|
||||
internal static IAlarmHistorianWriter? SelectAlarmHistorianWriter(
|
||||
DriverHost driverHost,
|
||||
IAlarmHistorianWriter? injectedWriter,
|
||||
out string? selectedSourceDescription)
|
||||
{
|
||||
foreach (var driverId in driverHost.RegisteredDriverIds)
|
||||
{
|
||||
if (driverHost.GetDriver(driverId) is IAlarmHistorianWriter w)
|
||||
{
|
||||
selectedSourceDescription = $"driver:{driverId}";
|
||||
return w;
|
||||
}
|
||||
}
|
||||
if (injectedWriter is not null)
|
||||
{
|
||||
selectedSourceDescription = $"di:{injectedWriter.GetType().Name}";
|
||||
return injectedWriter;
|
||||
}
|
||||
selectedSourceDescription = null;
|
||||
return null;
|
||||
}
|
||||
|
||||
private IAlarmHistorianSink ResolveHistorianSink()
|
||||
{
|
||||
var writer = SelectAlarmHistorianWriter(_driverHost, _injectedWriter, out var sourceDescription);
|
||||
if (writer is null)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Phase 7 historian sink: no driver or DI-registered IAlarmHistorianWriter — using {Sink}",
|
||||
_historianSink.GetType().Name);
|
||||
return _historianSink;
|
||||
}
|
||||
_logger.LogInformation(
|
||||
"Phase 7 historian sink: IAlarmHistorianWriter resolved from {Source} — SqliteStoreAndForwardSink active",
|
||||
sourceDescription);
|
||||
|
||||
var queueRoot = Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData);
|
||||
if (string.IsNullOrEmpty(queueRoot)) queueRoot = Path.GetTempPath();
|
||||
var queueDir = Path.Combine(queueRoot, "OtOpcUa");
|
||||
Directory.CreateDirectory(queueDir);
|
||||
var queuePath = Path.Combine(queueDir, "alarm-historian-queue.db");
|
||||
|
||||
var sinkLogger = _loggerFactory.CreateLogger<SqliteStoreAndForwardSink>();
|
||||
// SqliteStoreAndForwardSink wants a Serilog logger for warn-on-eviction emissions;
|
||||
// bridge the Microsoft logger via Serilog's null-safe path until the sink's
|
||||
// dependency surface is reshaped (covered as part of release-readiness).
|
||||
var serilogShim = _scriptLogger.ForContext("HistorianQueuePath", queuePath);
|
||||
_ownedSink = new SqliteStoreAndForwardSink(
|
||||
databasePath: queuePath,
|
||||
writer: writer,
|
||||
logger: serilogShim);
|
||||
_ownedSink.StartDrainLoop(TimeSpan.FromSeconds(2));
|
||||
return _ownedSink;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// For each registered driver that exposes <see cref="Core.Abstractions.ISubscribable"/>,
|
||||
/// build a UNS-path → driver-fullRef map from its EquipmentNamespaceContent.
|
||||
/// Path convention: <c>/{areaName}/{lineName}/{equipmentName}/{tagName}</c> matching
|
||||
/// what the EquipmentNodeWalker emits into the OPC UA browse tree, so script literals
|
||||
/// written against the operator-visible tree work without translation.
|
||||
/// </summary>
|
||||
internal static IReadOnlyList<DriverFeed> BuildDriverFeeds(
|
||||
DriverHost driverHost, DriverEquipmentContentRegistry equipmentRegistry)
|
||||
{
|
||||
var feeds = new List<DriverFeed>();
|
||||
foreach (var driverId in driverHost.RegisteredDriverIds)
|
||||
{
|
||||
var driver = driverHost.GetDriver(driverId);
|
||||
if (driver is not Core.Abstractions.ISubscribable subscribable) continue;
|
||||
|
||||
var content = equipmentRegistry.Get(driverId);
|
||||
if (content is null) continue;
|
||||
|
||||
var pathToFullRef = MapPathsToFullRefs(content);
|
||||
if (pathToFullRef.Count == 0) continue;
|
||||
|
||||
feeds.Add(new DriverFeed(subscribable, pathToFullRef, TimeSpan.FromSeconds(1)));
|
||||
}
|
||||
return feeds;
|
||||
}
|
||||
|
||||
internal static IReadOnlyDictionary<string, string> MapPathsToFullRefs(EquipmentNamespaceContent content)
|
||||
{
|
||||
var result = new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
var areaById = content.Areas.ToDictionary(a => a.UnsAreaId, StringComparer.OrdinalIgnoreCase);
|
||||
var lineById = content.Lines.ToDictionary(l => l.UnsLineId, StringComparer.OrdinalIgnoreCase);
|
||||
var equipmentById = content.Equipment.ToDictionary(e => e.EquipmentId, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var tag in content.Tags)
|
||||
{
|
||||
if (string.IsNullOrEmpty(tag.EquipmentId)) continue;
|
||||
if (!equipmentById.TryGetValue(tag.EquipmentId!, out var eq)) continue;
|
||||
if (!lineById.TryGetValue(eq.UnsLineId, out var line)) continue;
|
||||
if (!areaById.TryGetValue(line.UnsAreaId, out var area)) continue;
|
||||
|
||||
var path = $"/{area.Name}/{line.Name}/{eq.Name}/{tag.Name}";
|
||||
result[path] = tag.TagConfig; // duplicate-path collisions naturally win-last; UI publish-validation rules out duplicate names
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
if (_bridge is not null) await _bridge.DisposeAsync().ConfigureAwait(false);
|
||||
foreach (var d in _sources.Disposables)
|
||||
{
|
||||
try { d.Dispose(); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "Phase 7 disposable threw during shutdown"); }
|
||||
}
|
||||
// Owned SQLite sink: dispose first so the drain timer stops + final batch flushes
|
||||
// before we release the writer-bearing driver via DriverHost.DisposeAsync upstream.
|
||||
_ownedSink?.Dispose();
|
||||
if (_historianSink is IDisposable disposableSink) disposableSink.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,208 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Scripting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.VirtualTags;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 7 follow-up (task #243) — maps the generation's <see cref="Script"/> /
|
||||
/// <see cref="VirtualTag"/> / <see cref="ScriptedAlarm"/> rows into the runtime
|
||||
/// definitions <see cref="VirtualTagEngine"/> + <see cref="ScriptedAlarmEngine"/>
|
||||
/// expect, builds the engine instances, and returns the <see cref="IReadable"/>
|
||||
/// sources plus an <see cref="IAlarmSource"/> for the <c>DriverNodeManager</c>
|
||||
/// wiring added by task #239.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Empty Phase 7 config (no virtual tags + no scripted alarms) is a valid state:
|
||||
/// <see cref="Compose"/> returns a <see cref="Phase7ComposedSources"/> with null
|
||||
/// sources so Program.cs can pass them through to <c>OpcUaApplicationHost</c>
|
||||
/// unchanged — deployments without scripts behave exactly as they did before
|
||||
/// Phase 7.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The caller owns the returned <see cref="Phase7ComposedSources.Disposables"/>
|
||||
/// and must dispose them on shutdown. Engine cascades + timer ticks run off
|
||||
/// background threads until then.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public static class Phase7EngineComposer
|
||||
{
|
||||
public static Phase7ComposedSources Compose(
|
||||
IReadOnlyList<Script> scripts,
|
||||
IReadOnlyList<VirtualTag> virtualTags,
|
||||
IReadOnlyList<ScriptedAlarm> scriptedAlarms,
|
||||
CachedTagUpstreamSource upstream,
|
||||
IAlarmStateStore alarmStateStore,
|
||||
IAlarmHistorianSink historianSink,
|
||||
Serilog.ILogger rootScriptLogger,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(scripts);
|
||||
ArgumentNullException.ThrowIfNull(virtualTags);
|
||||
ArgumentNullException.ThrowIfNull(scriptedAlarms);
|
||||
ArgumentNullException.ThrowIfNull(upstream);
|
||||
ArgumentNullException.ThrowIfNull(alarmStateStore);
|
||||
ArgumentNullException.ThrowIfNull(historianSink);
|
||||
ArgumentNullException.ThrowIfNull(rootScriptLogger);
|
||||
ArgumentNullException.ThrowIfNull(loggerFactory);
|
||||
|
||||
if (virtualTags.Count == 0 && scriptedAlarms.Count == 0)
|
||||
return Phase7ComposedSources.Empty;
|
||||
|
||||
var scriptById = scripts
|
||||
.Where(s => s.Enabled())
|
||||
.ToDictionary(s => s.ScriptId, StringComparer.Ordinal);
|
||||
|
||||
var scriptLoggerFactory = new ScriptLoggerFactory(rootScriptLogger);
|
||||
var disposables = new List<IDisposable>();
|
||||
|
||||
// Engines take Serilog.ILogger — each engine gets its own so rolling-file emissions
|
||||
// stay keyed to the right source in the scripts-*.log.
|
||||
VirtualTagSource? vtSource = null;
|
||||
if (virtualTags.Count > 0)
|
||||
{
|
||||
var vtDefs = ProjectVirtualTags(virtualTags, scriptById).ToList();
|
||||
var vtEngine = new VirtualTagEngine(upstream, scriptLoggerFactory, rootScriptLogger);
|
||||
vtEngine.Load(vtDefs);
|
||||
vtSource = new VirtualTagSource(vtEngine);
|
||||
disposables.Add(vtEngine);
|
||||
}
|
||||
|
||||
IReadable? alarmReadable = null;
|
||||
if (scriptedAlarms.Count > 0)
|
||||
{
|
||||
var alarmDefs = ProjectScriptedAlarms(scriptedAlarms, scriptById).ToList();
|
||||
var alarmEngine = new ScriptedAlarmEngine(upstream, alarmStateStore, scriptLoggerFactory, rootScriptLogger);
|
||||
// Wire alarm emissions to the historian sink (Stream D). Fire-and-forget because
|
||||
// the sink's EnqueueAsync is already non-blocking from the producer's view.
|
||||
var engineLogger = loggerFactory.CreateLogger("Phase7HistorianRouter");
|
||||
alarmEngine.OnEvent += (_, e) => _ = RouteToHistorianAsync(e, historianSink, engineLogger);
|
||||
alarmEngine.LoadAsync(alarmDefs, CancellationToken.None).GetAwaiter().GetResult();
|
||||
var alarmSource = new ScriptedAlarmSource(alarmEngine);
|
||||
// Task #245 — expose each alarm's current Active state as IReadable so OPC UA
|
||||
// variable reads on Source=ScriptedAlarm nodes return the live predicate truth
|
||||
// instead of BadNotFound. ScriptedAlarmSource stays registered as IAlarmSource
|
||||
// for the event stream; the IReadable is a separate adapter over the same engine.
|
||||
alarmReadable = new ScriptedAlarmReadable(alarmEngine);
|
||||
disposables.Add(alarmEngine);
|
||||
disposables.Add(alarmSource);
|
||||
}
|
||||
|
||||
return new Phase7ComposedSources(vtSource, alarmReadable, disposables);
|
||||
}
|
||||
|
||||
internal static IEnumerable<VirtualTagDefinition> ProjectVirtualTags(
|
||||
IReadOnlyList<VirtualTag> rows, IReadOnlyDictionary<string, Script> scriptById)
|
||||
{
|
||||
foreach (var row in rows)
|
||||
{
|
||||
if (!row.Enabled) continue;
|
||||
if (!scriptById.TryGetValue(row.ScriptId, out var script))
|
||||
throw new InvalidOperationException(
|
||||
$"VirtualTag '{row.VirtualTagId}' references unknown / disabled Script '{row.ScriptId}' in this generation");
|
||||
|
||||
yield return new VirtualTagDefinition(
|
||||
Path: row.VirtualTagId,
|
||||
DataType: ParseDataType(row.DataType),
|
||||
ScriptSource: script.SourceCode,
|
||||
ChangeTriggered: row.ChangeTriggered,
|
||||
TimerInterval: row.TimerIntervalMs.HasValue
|
||||
? TimeSpan.FromMilliseconds(row.TimerIntervalMs.Value)
|
||||
: null,
|
||||
Historize: row.Historize);
|
||||
}
|
||||
}
|
||||
|
||||
internal static IEnumerable<ScriptedAlarmDefinition> ProjectScriptedAlarms(
|
||||
IReadOnlyList<ScriptedAlarm> rows, IReadOnlyDictionary<string, Script> scriptById)
|
||||
{
|
||||
foreach (var row in rows)
|
||||
{
|
||||
if (!row.Enabled) continue;
|
||||
if (!scriptById.TryGetValue(row.PredicateScriptId, out var script))
|
||||
throw new InvalidOperationException(
|
||||
$"ScriptedAlarm '{row.ScriptedAlarmId}' references unknown / disabled predicate Script '{row.PredicateScriptId}'");
|
||||
|
||||
yield return new ScriptedAlarmDefinition(
|
||||
AlarmId: row.ScriptedAlarmId,
|
||||
EquipmentPath: row.EquipmentId,
|
||||
AlarmName: row.Name,
|
||||
Kind: ParseAlarmKind(row.AlarmType),
|
||||
Severity: MapSeverity(row.Severity),
|
||||
MessageTemplate: row.MessageTemplate,
|
||||
PredicateScriptSource: script.SourceCode,
|
||||
HistorizeToAveva: row.HistorizeToAveva,
|
||||
Retain: row.Retain);
|
||||
}
|
||||
}
|
||||
|
||||
private static DriverDataType ParseDataType(string raw) =>
|
||||
Enum.TryParse<DriverDataType>(raw, ignoreCase: true, out var parsed) ? parsed : DriverDataType.String;
|
||||
|
||||
private static AlarmKind ParseAlarmKind(string raw) => raw switch
|
||||
{
|
||||
"AlarmCondition" => AlarmKind.AlarmCondition,
|
||||
"LimitAlarm" => AlarmKind.LimitAlarm,
|
||||
"DiscreteAlarm" => AlarmKind.DiscreteAlarm,
|
||||
"OffNormalAlarm" => AlarmKind.OffNormalAlarm,
|
||||
_ => throw new InvalidOperationException($"Unknown AlarmType '{raw}' — DB check constraint should have caught this"),
|
||||
};
|
||||
|
||||
// OPC UA Part 9 severity bands (1..1000) → AlarmSeverity enum. Matches the same
|
||||
// banding the AB CIP ALMA projection + OpcUaClient MapSeverity use.
|
||||
private static AlarmSeverity MapSeverity(int s) => s switch
|
||||
{
|
||||
<= 250 => AlarmSeverity.Low,
|
||||
<= 500 => AlarmSeverity.Medium,
|
||||
<= 750 => AlarmSeverity.High,
|
||||
_ => AlarmSeverity.Critical,
|
||||
};
|
||||
|
||||
private static async Task RouteToHistorianAsync(
|
||||
ScriptedAlarmEvent e, IAlarmHistorianSink sink, Microsoft.Extensions.Logging.ILogger log)
|
||||
{
|
||||
try
|
||||
{
|
||||
var historianEvent = new AlarmHistorianEvent(
|
||||
AlarmId: e.AlarmId,
|
||||
EquipmentPath: e.EquipmentPath,
|
||||
AlarmName: e.AlarmName,
|
||||
AlarmTypeName: e.Kind.ToString(),
|
||||
Severity: e.Severity,
|
||||
EventKind: e.Emission.ToString(),
|
||||
Message: e.Message,
|
||||
User: e.Condition.LastAckUser ?? "system",
|
||||
Comment: e.Condition.LastAckComment,
|
||||
TimestampUtc: e.TimestampUtc);
|
||||
await sink.EnqueueAsync(historianEvent, CancellationToken.None).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
log.LogWarning(ex, "Historian enqueue failed for alarm {AlarmId}/{Emission}", e.AlarmId, e.Emission);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>What <see cref="Phase7EngineComposer.Compose"/> returns.</summary>
|
||||
/// <param name="VirtualReadable">Non-null when virtual tags were composed; pass to <c>OpcUaApplicationHost.virtualReadable</c>.</param>
|
||||
/// <param name="ScriptedAlarmReadable">Non-null when scripted alarms were composed; pass to <c>OpcUaApplicationHost.scriptedAlarmReadable</c>.</param>
|
||||
/// <param name="Disposables">Engine + source instances the caller owns. Dispose on shutdown.</param>
|
||||
public sealed record Phase7ComposedSources(
|
||||
IReadable? VirtualReadable,
|
||||
IReadable? ScriptedAlarmReadable,
|
||||
IReadOnlyList<IDisposable> Disposables)
|
||||
{
|
||||
public static readonly Phase7ComposedSources Empty =
|
||||
new(null, null, Array.Empty<IDisposable>());
|
||||
}
|
||||
|
||||
internal static class ScriptEnabledExtensions
|
||||
{
|
||||
// Script has no explicit Enabled column; every row in the generation is a live script.
|
||||
public static bool Enabled(this Script _) => true;
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IReadable"/> adapter exposing each scripted alarm's current
|
||||
/// <see cref="AlarmActiveState"/> as an OPC UA boolean. Phase 7 follow-up (task #245).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Paired with the <see cref="NodeSourceKind.ScriptedAlarm"/> dispatch in
|
||||
/// <c>DriverNodeManager.OnReadValue</c>. Full-reference lookup is the
|
||||
/// <c>ScriptedAlarmId</c> the walker wrote into <c>DriverAttributeInfo.FullName</c>
|
||||
/// when emitting the alarm variable node.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Unknown alarm ids return <c>BadNodeIdUnknown</c> so misconfiguration surfaces
|
||||
/// instead of silently reading <c>false</c>. Alarms whose predicate has never
|
||||
/// been evaluated (brand new, before the engine's first cascade tick) report
|
||||
/// <see cref="AlarmActiveState.Inactive"/> via <see cref="AlarmConditionState.Fresh"/>,
|
||||
/// which matches the Part 9 initial-state semantics.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class ScriptedAlarmReadable : IReadable
|
||||
{
|
||||
/// <summary>OPC UA <c>StatusCodes.BadNodeIdUnknown</c> — kept local so we don't pull the OPC stack.</summary>
|
||||
private const uint BadNodeIdUnknown = 0x80340000;
|
||||
|
||||
private readonly ScriptedAlarmEngine _engine;
|
||||
|
||||
public ScriptedAlarmReadable(ScriptedAlarmEngine engine)
|
||||
{
|
||||
_engine = engine ?? throw new ArgumentNullException(nameof(engine));
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
|
||||
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(fullReferences);
|
||||
|
||||
var now = DateTime.UtcNow;
|
||||
var results = new DataValueSnapshot[fullReferences.Count];
|
||||
for (var i = 0; i < fullReferences.Count; i++)
|
||||
{
|
||||
var alarmId = fullReferences[i];
|
||||
var state = _engine.GetState(alarmId);
|
||||
if (state is null)
|
||||
{
|
||||
results[i] = new DataValueSnapshot(null, BadNodeIdUnknown, null, now);
|
||||
continue;
|
||||
}
|
||||
var active = state.Active == AlarmActiveState.Active;
|
||||
results[i] = new DataValueSnapshot(active, 0u, now, now);
|
||||
}
|
||||
return Task.FromResult<IReadOnlyList<DataValueSnapshot>>(results);
|
||||
}
|
||||
}
|
||||
252
src/Server/ZB.MOM.WW.OtOpcUa.Server/Program.cs
Normal file
252
src/Server/ZB.MOM.WW.OtOpcUa.Server/Program.cs
Normal file
@@ -0,0 +1,252 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Serilog;
|
||||
using Serilog.Formatting.Compact;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.AbCip;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.S7;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
|
||||
using ZB.MOM.WW.OtOpcUa.Server;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Alarms;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.History;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
var builder = Host.CreateApplicationBuilder(args);
|
||||
|
||||
// Per Phase 6.1 Stream C.3: SIEMs (Splunk, Datadog) ingest the JSON file without a
|
||||
// regex parser. Plain-text rolling file stays on by default for human readability;
|
||||
// JSON file is opt-in via appsetting `Serilog:WriteJson = true`.
|
||||
var writeJson = builder.Configuration.GetValue<bool>("Serilog:WriteJson");
|
||||
var loggerBuilder = new LoggerConfiguration()
|
||||
.ReadFrom.Configuration(builder.Configuration)
|
||||
.Enrich.FromLogContext()
|
||||
.WriteTo.Console()
|
||||
.WriteTo.File("logs/otopcua-.log", rollingInterval: RollingInterval.Day);
|
||||
|
||||
if (writeJson)
|
||||
{
|
||||
loggerBuilder = loggerBuilder.WriteTo.File(
|
||||
new CompactJsonFormatter(),
|
||||
"logs/otopcua-.json.log",
|
||||
rollingInterval: RollingInterval.Day);
|
||||
}
|
||||
|
||||
Log.Logger = loggerBuilder.CreateLogger();
|
||||
|
||||
builder.Services.AddSerilog();
|
||||
builder.Services.AddWindowsService(o => o.ServiceName = "OtOpcUa");
|
||||
|
||||
var nodeSection = builder.Configuration.GetSection(NodeOptions.SectionName);
|
||||
var options = new NodeOptions
|
||||
{
|
||||
NodeId = nodeSection.GetValue<string>("NodeId")
|
||||
?? throw new InvalidOperationException("Node:NodeId not configured"),
|
||||
ClusterId = nodeSection.GetValue<string>("ClusterId")
|
||||
?? throw new InvalidOperationException("Node:ClusterId not configured"),
|
||||
ConfigDbConnectionString = nodeSection.GetValue<string>("ConfigDbConnectionString")
|
||||
?? throw new InvalidOperationException("Node:ConfigDbConnectionString not configured"),
|
||||
LocalCachePath = nodeSection.GetValue<string>("LocalCachePath") ?? "config_cache.db",
|
||||
};
|
||||
|
||||
var opcUaSection = builder.Configuration.GetSection(OpcUaServerOptions.SectionName);
|
||||
var ldapSection = opcUaSection.GetSection("Ldap");
|
||||
var ldapOptions = new LdapOptions
|
||||
{
|
||||
Enabled = ldapSection.GetValue<bool?>("Enabled") ?? false,
|
||||
Server = ldapSection.GetValue<string>("Server") ?? "localhost",
|
||||
Port = ldapSection.GetValue<int?>("Port") ?? 3893,
|
||||
UseTls = ldapSection.GetValue<bool?>("UseTls") ?? false,
|
||||
AllowInsecureLdap = ldapSection.GetValue<bool?>("AllowInsecureLdap") ?? true,
|
||||
SearchBase = ldapSection.GetValue<string>("SearchBase") ?? "dc=lmxopcua,dc=local",
|
||||
ServiceAccountDn = ldapSection.GetValue<string>("ServiceAccountDn") ?? string.Empty,
|
||||
ServiceAccountPassword = ldapSection.GetValue<string>("ServiceAccountPassword") ?? string.Empty,
|
||||
GroupToRole = ldapSection.GetSection("GroupToRole").Get<Dictionary<string, string>>() ?? new(StringComparer.OrdinalIgnoreCase),
|
||||
};
|
||||
|
||||
var opcUaOptions = new OpcUaServerOptions
|
||||
{
|
||||
EndpointUrl = opcUaSection.GetValue<string>("EndpointUrl") ?? "opc.tcp://0.0.0.0:4840/OtOpcUa",
|
||||
ApplicationName = opcUaSection.GetValue<string>("ApplicationName") ?? "OtOpcUa Server",
|
||||
ApplicationUri = opcUaSection.GetValue<string>("ApplicationUri") ?? "urn:OtOpcUa:Server",
|
||||
PkiStoreRoot = opcUaSection.GetValue<string>("PkiStoreRoot")
|
||||
?? Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "OtOpcUa", "pki"),
|
||||
AutoAcceptUntrustedClientCertificates = opcUaSection.GetValue<bool?>("AutoAcceptUntrustedClientCertificates") ?? true,
|
||||
SecurityProfile = Enum.TryParse<OpcUaSecurityProfile>(opcUaSection.GetValue<string>("SecurityProfile"), true, out var p)
|
||||
? p : OpcUaSecurityProfile.None,
|
||||
Ldap = ldapOptions,
|
||||
AnonymousRoles = opcUaSection.GetSection("AnonymousRoles").Get<string[]>() ?? [],
|
||||
};
|
||||
|
||||
builder.Services.AddSingleton(options);
|
||||
builder.Services.AddSingleton(opcUaOptions);
|
||||
builder.Services.AddSingleton(ldapOptions);
|
||||
builder.Services.AddSingleton<IUserAuthenticator>(sp => ldapOptions.Enabled
|
||||
? new LdapUserAuthenticator(ldapOptions, sp.GetRequiredService<ILogger<LdapUserAuthenticator>>())
|
||||
: new DenyAllUserAuthenticator());
|
||||
builder.Services.AddSingleton<ILocalConfigCache>(_ => new LiteDbConfigCache(options.LocalCachePath));
|
||||
builder.Services.AddSingleton<DriverHost>();
|
||||
builder.Services.AddSingleton<NodeBootstrap>();
|
||||
|
||||
// Task #248 — driver-instance bootstrap pipeline. DriverFactoryRegistry is the
|
||||
// type-name → factory map; each driver project's static Register call pre-loads
|
||||
// its factory so the bootstrapper can materialise DriverInstance rows from the
|
||||
// central DB into live IDriver instances.
|
||||
builder.Services.AddSingleton<DriverFactoryRegistry>(_ =>
|
||||
{
|
||||
var registry = new DriverFactoryRegistry();
|
||||
// Galaxy access flows through the in-process GalaxyDriver (DriverType =
|
||||
// "GalaxyMxGateway") talking gRPC to the mxaccessgw worker. The legacy
|
||||
// out-of-process GalaxyProxyDriver retired in PR 7.2 once the parity matrix
|
||||
// (docs/v2/Galaxy.ParityMatrix.md) verified equivalence.
|
||||
ZB.MOM.WW.OtOpcUa.Driver.Galaxy.GalaxyDriverFactoryExtensions.Register(registry);
|
||||
FocasDriverFactoryExtensions.Register(registry);
|
||||
ModbusDriverFactoryExtensions.Register(registry);
|
||||
AbCipDriverFactoryExtensions.Register(registry);
|
||||
AbLegacyDriverFactoryExtensions.Register(registry);
|
||||
S7DriverFactoryExtensions.Register(registry);
|
||||
TwinCATDriverFactoryExtensions.Register(registry);
|
||||
return registry;
|
||||
});
|
||||
builder.Services.AddSingleton<DriverInstanceBootstrapper>();
|
||||
|
||||
// Phase 6.1 Stream B.4 (task #137) — ScheduledRecycleHostedService. Empty scheduler
|
||||
// list by default; DriverInstanceBootstrapper calls AddScheduler for any Tier C driver
|
||||
// whose ResilienceConfig carries a RecycleIntervalSeconds AND has an IDriverSupervisor
|
||||
// registered in DI. Registered as singleton so DriverInstanceBootstrapper can inject
|
||||
// the same instance that the BackgroundService loop drives.
|
||||
builder.Services.AddSingleton<ScheduledRecycleHostedService>();
|
||||
builder.Services.AddHostedService(sp => sp.GetRequiredService<ScheduledRecycleHostedService>());
|
||||
|
||||
// ADR-001 Option A wiring — the registry is the handoff between OpcUaServerService's
|
||||
// bootstrap-time population pass + OpcUaApplicationHost's StartAsync walker invocation.
|
||||
// DriverEquipmentContentRegistry.Get is the equipmentContentLookup delegate that PR #155
|
||||
// added to OpcUaApplicationHost's ctor seam.
|
||||
builder.Services.AddSingleton<DriverEquipmentContentRegistry>();
|
||||
builder.Services.AddScoped<EquipmentNamespaceContentLoader>();
|
||||
// Phase 6.2 Stream C wiring — constructs AuthorizationGate + NodeScopeResolver from the
|
||||
// published generation's NodeAcl rows + per-driver EquipmentNamespaceContent. Gated by
|
||||
// NodeOptions.Authorization.Enabled (default false) so existing deployments don't flip
|
||||
// to ACL enforcement accidentally on upgrade.
|
||||
builder.Services.AddSingleton<AuthorizationBootstrap>();
|
||||
|
||||
// PR 1+2.W — server-level history routing + alarm-condition state machine. Singletons
|
||||
// shared across every DriverNodeManager. The alarm service runs the Active /
|
||||
// Acknowledged / Inactive state machine for any driver that declares alarms via
|
||||
// AlarmConditionInfo's sub-attribute refs.
|
||||
builder.Services.AddSingleton<IHistoryRouter, HistoryRouter>();
|
||||
builder.Services.AddSingleton<AlarmConditionService>();
|
||||
|
||||
// PR 3.W — Wonderware historian sidecar wiring. Reads Historian:Wonderware:* from
|
||||
// configuration; when Enabled=true, registers the .NET 10 client as both an
|
||||
// IHistorianDataSource (via IHistoryRouter under the configured driver instance
|
||||
// prefix; defaults to "galaxy") and an IAlarmHistorianWriter (consumed by the
|
||||
// SqliteStoreAndForwardSink drain worker once task #248 wires it). Disabled
|
||||
// deployments fall back to DriverNodeManager's legacy IHistoryProvider adapter
|
||||
// for the read path and NullAlarmHistorianSink for the write path — keeping the
|
||||
// sidecar fully optional until the legacy paths retire in PR 7.2.
|
||||
var wonderwareSection = builder.Configuration.GetSection("Historian:Wonderware");
|
||||
var wonderwareEnabled = wonderwareSection.GetValue("Enabled", false);
|
||||
if (wonderwareEnabled)
|
||||
{
|
||||
var wonderwarePrefix = wonderwareSection.GetValue("DriverInstancePrefix", "galaxy")
|
||||
?? throw new InvalidOperationException("Historian:Wonderware:DriverInstancePrefix must be a string when configured.");
|
||||
var wonderwareOptions = new WonderwareHistorianClientOptions(
|
||||
PipeName: wonderwareSection.GetValue<string>("PipeName")
|
||||
?? throw new InvalidOperationException("Historian:Wonderware:PipeName must be set when Enabled=true."),
|
||||
SharedSecret: wonderwareSection.GetValue<string>("SharedSecret")
|
||||
?? throw new InvalidOperationException("Historian:Wonderware:SharedSecret must be set when Enabled=true."),
|
||||
PeerName: wonderwareSection.GetValue("PeerName", $"OtOpcUa-{options.NodeId}") ?? "OtOpcUa",
|
||||
ConnectTimeout: TimeSpan.FromSeconds(wonderwareSection.GetValue("ConnectTimeoutSeconds", 10)),
|
||||
CallTimeout: TimeSpan.FromSeconds(wonderwareSection.GetValue("CallTimeoutSeconds", 30)));
|
||||
builder.Services.AddSingleton(wonderwareOptions);
|
||||
builder.Services.AddSingleton<WonderwareHistorianClient>();
|
||||
builder.Services.AddSingleton<IAlarmHistorianWriter>(sp => sp.GetRequiredService<WonderwareHistorianClient>());
|
||||
builder.Services.AddHostedService(sp => new WonderwareHistorianBootstrap(
|
||||
sp.GetRequiredService<IHistoryRouter>(),
|
||||
sp.GetRequiredService<WonderwareHistorianClient>(),
|
||||
wonderwarePrefix,
|
||||
sp.GetRequiredService<ILogger<WonderwareHistorianBootstrap>>()));
|
||||
}
|
||||
|
||||
builder.Services.AddSingleton<OpcUaApplicationHost>(sp =>
|
||||
{
|
||||
var registry = sp.GetRequiredService<DriverEquipmentContentRegistry>();
|
||||
return new OpcUaApplicationHost(
|
||||
sp.GetRequiredService<OpcUaServerOptions>(),
|
||||
sp.GetRequiredService<DriverHost>(),
|
||||
sp.GetRequiredService<IUserAuthenticator>(),
|
||||
sp.GetRequiredService<ILoggerFactory>(),
|
||||
sp.GetRequiredService<ILogger<OpcUaApplicationHost>>(),
|
||||
equipmentContentLookup: registry.Get,
|
||||
historyRouter: sp.GetRequiredService<IHistoryRouter>(),
|
||||
alarmConditionService: sp.GetRequiredService<AlarmConditionService>());
|
||||
});
|
||||
builder.Services.AddHostedService<OpcUaServerService>();
|
||||
|
||||
// Central-config DB access for the host-status publisher (LMX follow-up #7). Scoped context
|
||||
// so per-heartbeat change-tracking stays isolated; publisher opens one scope per tick.
|
||||
builder.Services.AddDbContext<OtOpcUaConfigDbContext>(opt =>
|
||||
opt.UseSqlServer(options.ConfigDbConnectionString));
|
||||
// Additional pooled factory so Phase 6.3 RedundancyCoordinator (singleton) can create its
|
||||
// own scoped DbContext for topology loading without fighting the scoped HostStatusPublisher.
|
||||
builder.Services.AddDbContextFactory<OtOpcUaConfigDbContext>(opt =>
|
||||
opt.UseSqlServer(options.ConfigDbConnectionString));
|
||||
builder.Services.AddHostedService<HostStatusPublisher>();
|
||||
|
||||
// Phase 6.3 Stream C (task #147) — ServiceLevel + ServerUriArray + RedundancySupport node
|
||||
// wiring. Coordinator holds topology; publisher computes ServiceLevel byte + ServerUriArray;
|
||||
// hosted service ticks publisher + pushes values onto the Server object via the node writer.
|
||||
builder.Services.AddSingleton(sp => new RedundancyCoordinator(
|
||||
sp.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>(),
|
||||
sp.GetRequiredService<ILogger<RedundancyCoordinator>>(),
|
||||
options.NodeId, options.ClusterId));
|
||||
builder.Services.AddSingleton<ApplyLeaseRegistry>();
|
||||
builder.Services.AddSingleton<RecoveryStateManager>();
|
||||
builder.Services.AddSingleton<PeerReachabilityTracker>();
|
||||
builder.Services.AddSingleton(sp => new RedundancyStatePublisher(
|
||||
sp.GetRequiredService<RedundancyCoordinator>(),
|
||||
sp.GetRequiredService<ApplyLeaseRegistry>(),
|
||||
sp.GetRequiredService<RecoveryStateManager>(),
|
||||
sp.GetRequiredService<PeerReachabilityTracker>()));
|
||||
builder.Services.AddHostedService<RedundancyPublisherHostedService>();
|
||||
|
||||
// Phase 6.3 Stream B — two-layer peer-probe loops populating PeerReachabilityTracker.
|
||||
// Without these the publisher sees PeerReachability.Unknown for every peer and degrades
|
||||
// to the Isolated-Primary band (230) even when the peer is up. Safe default but not the
|
||||
// full non-transparent-redundancy UX.
|
||||
builder.Services.AddSingleton<PeerProbeOptions>();
|
||||
builder.Services.AddHttpClient(PeerHttpProbeLoop.HttpClientName);
|
||||
builder.Services.AddHostedService<PeerHttpProbeLoop>();
|
||||
builder.Services.AddHostedService<PeerUaProbeLoop>();
|
||||
|
||||
// Phase 6.3 A.2 + 6.1 Stream D — periodic generation refresh. Detects peer-published
|
||||
// generations, opens an ApplyLeaseRegistry lease during the refresh window (so the
|
||||
// publisher surfaces PrimaryMidApply=200 instead of sitting at PrimaryHealthy=255
|
||||
// through the apply), and calls coordinator.RefreshAsync to pick up topology changes.
|
||||
builder.Services.AddHostedService<GenerationRefreshHostedService>();
|
||||
|
||||
// Phase 7 follow-up #246 — historian sink + engine composer. NullAlarmHistorianSink
|
||||
// is the default until the Galaxy.Host SqliteStoreAndForwardSink writer adapter
|
||||
// lands (task #248). The composer reads Script/VirtualTag/ScriptedAlarm rows on
|
||||
// generation bootstrap, builds the engines, and starts the driver-bridge feed.
|
||||
builder.Services.AddSingleton<IAlarmHistorianSink>(NullAlarmHistorianSink.Instance);
|
||||
builder.Services.AddSingleton(Log.Logger); // Serilog root for ScriptLoggerFactory
|
||||
builder.Services.AddSingleton<Phase7Composer>();
|
||||
|
||||
var host = builder.Build();
|
||||
await host.RunAsync();
|
||||
@@ -0,0 +1,85 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks in-progress publish-generation apply leases keyed on
|
||||
/// <c>(ConfigGenerationId, PublishRequestId)</c>. Per decision #162 a sealed lease pattern
|
||||
/// ensures <see cref="IsApplyInProgress"/> reflects every exit path (success / exception /
|
||||
/// cancellation) because the IAsyncDisposable returned by <see cref="BeginApplyLease"/>
|
||||
/// decrements unconditionally.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A watchdog loop calls <see cref="PruneStale"/> periodically with the configured
|
||||
/// <see cref="ApplyMaxDuration"/>; any lease older than that is force-closed so a crashed
|
||||
/// publisher can't pin the node at <see cref="ServiceLevelBand.PrimaryMidApply"/>.
|
||||
/// </remarks>
|
||||
public sealed class ApplyLeaseRegistry
|
||||
{
|
||||
private readonly ConcurrentDictionary<LeaseKey, DateTime> _leases = new();
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public TimeSpan ApplyMaxDuration { get; }
|
||||
|
||||
public ApplyLeaseRegistry(TimeSpan? applyMaxDuration = null, TimeProvider? timeProvider = null)
|
||||
{
|
||||
ApplyMaxDuration = applyMaxDuration ?? TimeSpan.FromMinutes(10);
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Register a new lease. Returns an <see cref="IAsyncDisposable"/> whose disposal
|
||||
/// decrements the registry; use <c>await using</c> in the caller so every exit path
|
||||
/// closes the lease.
|
||||
/// </summary>
|
||||
public IAsyncDisposable BeginApplyLease(long generationId, Guid publishRequestId)
|
||||
{
|
||||
var key = new LeaseKey(generationId, publishRequestId);
|
||||
_leases[key] = _timeProvider.GetUtcNow().UtcDateTime;
|
||||
return new LeaseScope(this, key);
|
||||
}
|
||||
|
||||
/// <summary>True when at least one apply lease is currently open.</summary>
|
||||
public bool IsApplyInProgress => !_leases.IsEmpty;
|
||||
|
||||
/// <summary>Current open-lease count — diagnostics only.</summary>
|
||||
public int OpenLeaseCount => _leases.Count;
|
||||
|
||||
/// <summary>Force-close any lease older than <see cref="ApplyMaxDuration"/>. Watchdog tick.</summary>
|
||||
/// <returns>Number of leases the watchdog closed on this tick.</returns>
|
||||
public int PruneStale()
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow().UtcDateTime;
|
||||
var closed = 0;
|
||||
foreach (var kv in _leases)
|
||||
{
|
||||
if (now - kv.Value > ApplyMaxDuration && _leases.TryRemove(kv.Key, out _))
|
||||
closed++;
|
||||
}
|
||||
return closed;
|
||||
}
|
||||
|
||||
private void Release(LeaseKey key) => _leases.TryRemove(key, out _);
|
||||
|
||||
private readonly record struct LeaseKey(long GenerationId, Guid PublishRequestId);
|
||||
|
||||
private sealed class LeaseScope : IAsyncDisposable
|
||||
{
|
||||
private readonly ApplyLeaseRegistry _owner;
|
||||
private readonly LeaseKey _key;
|
||||
private int _disposed;
|
||||
|
||||
public LeaseScope(ApplyLeaseRegistry owner, LeaseKey key)
|
||||
{
|
||||
_owner = owner;
|
||||
_key = key;
|
||||
}
|
||||
|
||||
public ValueTask DisposeAsync()
|
||||
{
|
||||
if (Interlocked.Exchange(ref _disposed, 1) == 0)
|
||||
_owner.Release(_key);
|
||||
return ValueTask.CompletedTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Pure-function mapper from the shared config DB's <see cref="ServerCluster"/> +
|
||||
/// <see cref="ClusterNode"/> rows to an immutable <see cref="RedundancyTopology"/>.
|
||||
/// Validates Phase 6.3 Stream A.1 invariants and throws
|
||||
/// <see cref="InvalidTopologyException"/> on violation so the coordinator can fail startup
|
||||
/// fast with a clear message rather than boot into an ambiguous state.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Stateless — the caller owns the DB round-trip + hands rows in. Keeping it pure makes
|
||||
/// the invariant matrix testable without EF or SQL Server.
|
||||
/// </remarks>
|
||||
public static class ClusterTopologyLoader
|
||||
{
|
||||
/// <summary>Build a topology snapshot for the given self node. Throws on invariant violation.</summary>
|
||||
public static RedundancyTopology Load(string selfNodeId, ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(selfNodeId);
|
||||
ArgumentNullException.ThrowIfNull(cluster);
|
||||
ArgumentNullException.ThrowIfNull(nodes);
|
||||
|
||||
ValidateClusterShape(cluster, nodes);
|
||||
ValidateUniqueApplicationUris(nodes);
|
||||
ValidatePrimaryCount(cluster, nodes);
|
||||
|
||||
var self = nodes.FirstOrDefault(n => string.Equals(n.NodeId, selfNodeId, StringComparison.OrdinalIgnoreCase))
|
||||
?? throw new InvalidTopologyException(
|
||||
$"Self node '{selfNodeId}' is not a member of cluster '{cluster.ClusterId}'. " +
|
||||
$"Members: {string.Join(", ", nodes.Select(n => n.NodeId))}.");
|
||||
|
||||
var peers = nodes
|
||||
.Where(n => !string.Equals(n.NodeId, selfNodeId, StringComparison.OrdinalIgnoreCase))
|
||||
.Select(n => new RedundancyPeer(
|
||||
NodeId: n.NodeId,
|
||||
Role: n.RedundancyRole,
|
||||
Host: n.Host,
|
||||
OpcUaPort: n.OpcUaPort,
|
||||
DashboardPort: n.DashboardPort,
|
||||
ApplicationUri: n.ApplicationUri))
|
||||
.ToList();
|
||||
|
||||
return new RedundancyTopology(
|
||||
ClusterId: cluster.ClusterId,
|
||||
SelfNodeId: self.NodeId,
|
||||
SelfRole: self.RedundancyRole,
|
||||
Mode: cluster.RedundancyMode,
|
||||
Peers: peers,
|
||||
SelfApplicationUri: self.ApplicationUri);
|
||||
}
|
||||
|
||||
private static void ValidateClusterShape(ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
|
||||
{
|
||||
if (nodes.Count == 0)
|
||||
throw new InvalidTopologyException($"Cluster '{cluster.ClusterId}' has zero nodes.");
|
||||
|
||||
// Decision #83 — v2.0 caps clusters at two nodes.
|
||||
if (nodes.Count > 2)
|
||||
throw new InvalidTopologyException(
|
||||
$"Cluster '{cluster.ClusterId}' has {nodes.Count} nodes. v2.0 supports at most 2 nodes per cluster (decision #83).");
|
||||
|
||||
// Every node must belong to the given cluster.
|
||||
var wrongCluster = nodes.FirstOrDefault(n =>
|
||||
!string.Equals(n.ClusterId, cluster.ClusterId, StringComparison.OrdinalIgnoreCase));
|
||||
if (wrongCluster is not null)
|
||||
throw new InvalidTopologyException(
|
||||
$"Node '{wrongCluster.NodeId}' belongs to cluster '{wrongCluster.ClusterId}', not '{cluster.ClusterId}'.");
|
||||
}
|
||||
|
||||
private static void ValidateUniqueApplicationUris(IReadOnlyList<ClusterNode> nodes)
|
||||
{
|
||||
var dup = nodes
|
||||
.GroupBy(n => n.ApplicationUri, StringComparer.Ordinal)
|
||||
.FirstOrDefault(g => g.Count() > 1);
|
||||
if (dup is not null)
|
||||
throw new InvalidTopologyException(
|
||||
$"Nodes {string.Join(", ", dup.Select(n => n.NodeId))} share ApplicationUri '{dup.Key}'. " +
|
||||
$"OPC UA Part 4 requires unique ApplicationUri per server — clients pin trust here (decision #86).");
|
||||
}
|
||||
|
||||
private static void ValidatePrimaryCount(ServerCluster cluster, IReadOnlyList<ClusterNode> nodes)
|
||||
{
|
||||
// Standalone mode: any role is fine. Warm / Hot: at most one Primary per cluster.
|
||||
if (cluster.RedundancyMode == RedundancyMode.None) return;
|
||||
|
||||
var primaries = nodes.Count(n => n.RedundancyRole == RedundancyRole.Primary);
|
||||
if (primaries > 1)
|
||||
throw new InvalidTopologyException(
|
||||
$"Cluster '{cluster.ClusterId}' has {primaries} Primary nodes in redundancy mode {cluster.RedundancyMode}. " +
|
||||
$"At most one Primary per cluster (decision #84). Runtime detects and demotes both to ServiceLevel 2 " +
|
||||
$"per the 8-state matrix; startup fails fast to surface the misconfiguration earlier.");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Latest observed reachability of the peer node per the Phase 6.3 Stream B.1/B.2 two-layer
|
||||
/// probe model. HTTP layer is the fast-fail; UA layer is authoritative.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Fed into the <see cref="ServiceLevelCalculator"/> as <c>peerHttpHealthy</c> +
|
||||
/// <c>peerUaHealthy</c>. The concrete probe loops (<c>PeerHttpProbeLoop</c> +
|
||||
/// <c>PeerUaProbeLoop</c>) live in a Stream B runtime follow-up — this type is the
|
||||
/// contract the publisher reads; probers write via
|
||||
/// <see cref="PeerReachabilityTracker"/>.
|
||||
/// </remarks>
|
||||
public sealed record PeerReachability(bool HttpHealthy, bool UaHealthy)
|
||||
{
|
||||
public static readonly PeerReachability Unknown = new(false, false);
|
||||
public static readonly PeerReachability FullyHealthy = new(true, true);
|
||||
|
||||
/// <summary>True when both probes report healthy — the <c>ServiceLevelCalculator</c>'s peerReachable gate.</summary>
|
||||
public bool BothHealthy => HttpHealthy && UaHealthy;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Thread-safe holder of the latest <see cref="PeerReachability"/> per peer NodeId. Probe
|
||||
/// loops call <see cref="Update"/>; the <see cref="RedundancyStatePublisher"/> reads via
|
||||
/// <see cref="Get"/>.
|
||||
/// </summary>
|
||||
public sealed class PeerReachabilityTracker
|
||||
{
|
||||
private readonly System.Collections.Concurrent.ConcurrentDictionary<string, PeerReachability> _byPeer =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
public void Update(string peerNodeId, PeerReachability reachability)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(peerNodeId);
|
||||
_byPeer[peerNodeId] = reachability ?? throw new ArgumentNullException(nameof(reachability));
|
||||
}
|
||||
|
||||
/// <summary>Current reachability for a peer. Returns <see cref="PeerReachability.Unknown"/> when not yet probed.</summary>
|
||||
public PeerReachability Get(string peerNodeId) =>
|
||||
_byPeer.TryGetValue(peerNodeId, out var r) ? r : PeerReachability.Unknown;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks the Recovering-band dwell for a node after a <c>Faulted → Healthy</c> transition.
|
||||
/// Per decision #154 and Phase 6.3 Stream B.4 a node that has just returned to health stays
|
||||
/// in the Recovering band (180 Primary / 30 Backup) until BOTH: (a) the configured
|
||||
/// <see cref="DwellTime"/> has elapsed, AND (b) at least one successful publish-witness
|
||||
/// read has been observed.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Purely in-memory, no I/O. The coordinator feeds events into <see cref="MarkFaulted"/>,
|
||||
/// <see cref="MarkRecovered"/>, and <see cref="RecordPublishWitness"/>; <see cref="IsDwellMet"/>
|
||||
/// becomes true only after both conditions converge.
|
||||
/// </remarks>
|
||||
public sealed class RecoveryStateManager
|
||||
{
|
||||
private readonly TimeSpan _dwellTime;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
/// <summary>Last time the node transitioned Faulted → Healthy. Null until first recovery.</summary>
|
||||
private DateTime? _recoveredUtc;
|
||||
|
||||
/// <summary>True once a publish-witness read has succeeded after the last recovery.</summary>
|
||||
private bool _witnessed;
|
||||
|
||||
public TimeSpan DwellTime => _dwellTime;
|
||||
|
||||
public RecoveryStateManager(TimeSpan? dwellTime = null, TimeProvider? timeProvider = null)
|
||||
{
|
||||
_dwellTime = dwellTime ?? TimeSpan.FromSeconds(60);
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
/// <summary>Report that the node has entered the Faulted state.</summary>
|
||||
public void MarkFaulted()
|
||||
{
|
||||
_recoveredUtc = null;
|
||||
_witnessed = false;
|
||||
}
|
||||
|
||||
/// <summary>Report that the node has transitioned Faulted → Healthy; dwell clock starts now.</summary>
|
||||
public void MarkRecovered()
|
||||
{
|
||||
_recoveredUtc = _timeProvider.GetUtcNow().UtcDateTime;
|
||||
_witnessed = false;
|
||||
}
|
||||
|
||||
/// <summary>Report a successful publish-witness read.</summary>
|
||||
public void RecordPublishWitness() => _witnessed = true;
|
||||
|
||||
/// <summary>
|
||||
/// True when the dwell is considered met: either the node never faulted in the first
|
||||
/// place, or both (dwell time elapsed + publish witness recorded) since the last
|
||||
/// recovery. False means the coordinator should report Recovering-band ServiceLevel.
|
||||
/// </summary>
|
||||
public bool IsDwellMet()
|
||||
{
|
||||
if (_recoveredUtc is null) return true; // never faulted → dwell N/A
|
||||
|
||||
if (!_witnessed) return false;
|
||||
|
||||
var elapsed = _timeProvider.GetUtcNow().UtcDateTime - _recoveredUtc.Value;
|
||||
return elapsed >= _dwellTime;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Process-singleton holder of the current <see cref="RedundancyTopology"/>. Reads the
|
||||
/// shared config DB at <see cref="InitializeAsync"/> time + re-reads on
|
||||
/// <see cref="RefreshAsync"/> (called after <c>sp_PublishGeneration</c> completes so
|
||||
/// operator role-swaps take effect without a process restart).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Per Phase 6.3 Stream A.1-A.2. The coordinator is the source of truth for the
|
||||
/// <see cref="ServiceLevelCalculator"/> inputs: role (from topology), peer reachability
|
||||
/// (from peer-probe loops — Stream B.1/B.2 follow-up), apply-in-progress (from
|
||||
/// <see cref="ApplyLeaseRegistry"/>), topology-valid (from invariant checks at load time
|
||||
/// + runtime detection of conflicting peer claims).</para>
|
||||
///
|
||||
/// <para>Topology refresh is CAS-style: a new <see cref="RedundancyTopology"/> instance
|
||||
/// replaces the old one atomically via <see cref="Interlocked.Exchange{T}"/>. Readers
|
||||
/// always see a coherent snapshot — never a partial transition.</para>
|
||||
/// </remarks>
|
||||
public sealed class RedundancyCoordinator
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbContextFactory;
|
||||
private readonly ILogger<RedundancyCoordinator> _logger;
|
||||
private readonly string _selfNodeId;
|
||||
private readonly string _selfClusterId;
|
||||
private RedundancyTopology? _current;
|
||||
private bool _topologyValid = true;
|
||||
|
||||
public RedundancyCoordinator(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbContextFactory,
|
||||
ILogger<RedundancyCoordinator> logger,
|
||||
string selfNodeId,
|
||||
string selfClusterId)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(selfNodeId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(selfClusterId);
|
||||
|
||||
_dbContextFactory = dbContextFactory;
|
||||
_logger = logger;
|
||||
_selfNodeId = selfNodeId;
|
||||
_selfClusterId = selfClusterId;
|
||||
}
|
||||
|
||||
/// <summary>Last-loaded topology; null before <see cref="InitializeAsync"/> completes.</summary>
|
||||
public RedundancyTopology? Current => Volatile.Read(ref _current);
|
||||
|
||||
/// <summary>
|
||||
/// True when the last load/refresh completed without an invariant violation; false
|
||||
/// forces <see cref="ServiceLevelCalculator"/> into the <see cref="ServiceLevelBand.InvalidTopology"/>
|
||||
/// band regardless of other inputs.
|
||||
/// </summary>
|
||||
public bool IsTopologyValid => Volatile.Read(ref _topologyValid);
|
||||
|
||||
/// <summary>Load the topology for the first time. Throws on invariant violation.</summary>
|
||||
public async Task InitializeAsync(CancellationToken ct)
|
||||
{
|
||||
await RefreshInternalAsync(throwOnInvalid: true, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Re-read the topology from the shared DB. Called after <c>sp_PublishGeneration</c>
|
||||
/// completes or after an Admin-triggered role-swap. Never throws — on invariant
|
||||
/// violation it logs + flips <see cref="IsTopologyValid"/> false so the calculator
|
||||
/// returns <see cref="ServiceLevelBand.InvalidTopology"/> = 2.
|
||||
/// </summary>
|
||||
public async Task RefreshAsync(CancellationToken ct)
|
||||
{
|
||||
await RefreshInternalAsync(throwOnInvalid: false, ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async Task RefreshInternalAsync(bool throwOnInvalid, CancellationToken ct)
|
||||
{
|
||||
await using var db = await _dbContextFactory.CreateDbContextAsync(ct).ConfigureAwait(false);
|
||||
|
||||
var cluster = await db.ServerClusters.AsNoTracking()
|
||||
.FirstOrDefaultAsync(c => c.ClusterId == _selfClusterId, ct).ConfigureAwait(false)
|
||||
?? throw new InvalidTopologyException($"Cluster '{_selfClusterId}' not found in config DB.");
|
||||
|
||||
var nodes = await db.ClusterNodes.AsNoTracking()
|
||||
.Where(n => n.ClusterId == _selfClusterId && n.Enabled)
|
||||
.ToListAsync(ct).ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
var topology = ClusterTopologyLoader.Load(_selfNodeId, cluster, nodes);
|
||||
Volatile.Write(ref _current, topology);
|
||||
Volatile.Write(ref _topologyValid, true);
|
||||
_logger.LogInformation(
|
||||
"Redundancy topology loaded: cluster={Cluster} self={Self} role={Role} mode={Mode} peers={PeerCount}",
|
||||
topology.ClusterId, topology.SelfNodeId, topology.SelfRole, topology.Mode, topology.PeerCount);
|
||||
}
|
||||
catch (InvalidTopologyException ex)
|
||||
{
|
||||
Volatile.Write(ref _topologyValid, false);
|
||||
_logger.LogError(ex,
|
||||
"Redundancy topology invariant violation for cluster {Cluster}: {Reason}",
|
||||
_selfClusterId, ex.Message);
|
||||
if (throwOnInvalid) throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates Phase 6.3 Stream C: feeds the <see cref="ServiceLevelCalculator"/> with the
|
||||
/// current (topology, peer reachability, apply-in-progress, recovery dwell, self health)
|
||||
/// inputs and emits the resulting <see cref="byte"/> + labelled <see cref="ServiceLevelBand"/>
|
||||
/// to subscribers. The OPC UA <c>ServiceLevel</c> variable node consumes this via
|
||||
/// <see cref="OnStateChanged"/> on every tick.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Pure orchestration — no background timer, no OPC UA stack dep. The caller (a
|
||||
/// HostedService in a future PR, or a test) drives <see cref="ComputeAndPublish"/> at
|
||||
/// whatever cadence is appropriate. Each call reads the inputs + recomputes the ServiceLevel
|
||||
/// byte; state is fired on the <see cref="OnStateChanged"/> event when the byte differs from
|
||||
/// the last emitted value (edge-triggered). The <see cref="OnServerUriArrayChanged"/> event
|
||||
/// fires whenever the topology's <c>ServerUriArray</c> content changes.
|
||||
/// </remarks>
|
||||
public sealed class RedundancyStatePublisher
|
||||
{
|
||||
private readonly RedundancyCoordinator _coordinator;
|
||||
private readonly ApplyLeaseRegistry _leases;
|
||||
private readonly RecoveryStateManager _recovery;
|
||||
private readonly PeerReachabilityTracker _peers;
|
||||
private readonly Func<bool> _selfHealthy;
|
||||
private readonly Func<bool> _operatorMaintenance;
|
||||
private byte _lastByte = 255; // start at Authoritative — harmless before first tick
|
||||
private IReadOnlyList<string>? _lastServerUriArray;
|
||||
|
||||
public RedundancyStatePublisher(
|
||||
RedundancyCoordinator coordinator,
|
||||
ApplyLeaseRegistry leases,
|
||||
RecoveryStateManager recovery,
|
||||
PeerReachabilityTracker peers,
|
||||
Func<bool>? selfHealthy = null,
|
||||
Func<bool>? operatorMaintenance = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(coordinator);
|
||||
ArgumentNullException.ThrowIfNull(leases);
|
||||
ArgumentNullException.ThrowIfNull(recovery);
|
||||
ArgumentNullException.ThrowIfNull(peers);
|
||||
|
||||
_coordinator = coordinator;
|
||||
_leases = leases;
|
||||
_recovery = recovery;
|
||||
_peers = peers;
|
||||
_selfHealthy = selfHealthy ?? (() => true);
|
||||
_operatorMaintenance = operatorMaintenance ?? (() => false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fires with the current ServiceLevel byte + band on every call to
|
||||
/// <see cref="ComputeAndPublish"/> when the byte differs from the previously-emitted one.
|
||||
/// </summary>
|
||||
public event Action<ServiceLevelSnapshot>? OnStateChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Fires when the cluster's ServerUriArray (self + peers) content changes — e.g. an
|
||||
/// operator adds or removes a peer. Consumer is the OPC UA <c>ServerUriArray</c>
|
||||
/// variable node in Stream C.2.
|
||||
/// </summary>
|
||||
public event Action<IReadOnlyList<string>>? OnServerUriArrayChanged;
|
||||
|
||||
/// <summary>Snapshot of the last-published ServiceLevel byte — diagnostics + tests.</summary>
|
||||
public byte LastByte => _lastByte;
|
||||
|
||||
/// <summary>
|
||||
/// Compute the current ServiceLevel + emit change events if anything moved. Caller
|
||||
/// drives cadence — a 1 s tick in production is reasonable; tests drive it directly.
|
||||
/// </summary>
|
||||
public ServiceLevelSnapshot ComputeAndPublish()
|
||||
{
|
||||
var topology = _coordinator.Current;
|
||||
if (topology is null)
|
||||
{
|
||||
// Not yet initialized — surface NoData so clients don't treat us as authoritative.
|
||||
return Emit((byte)ServiceLevelBand.NoData, null);
|
||||
}
|
||||
|
||||
// Aggregate peer reachability. For 2-node v2.0 clusters there is at most one peer;
|
||||
// treat "all peers healthy" as the boolean input to the calculator.
|
||||
var peerReachable = topology.Peers.All(p => _peers.Get(p.NodeId).BothHealthy);
|
||||
var peerUaHealthy = topology.Peers.All(p => _peers.Get(p.NodeId).UaHealthy);
|
||||
var peerHttpHealthy = topology.Peers.All(p => _peers.Get(p.NodeId).HttpHealthy);
|
||||
|
||||
var role = MapRole(topology.SelfRole);
|
||||
|
||||
var value = ServiceLevelCalculator.Compute(
|
||||
role: role,
|
||||
selfHealthy: _selfHealthy(),
|
||||
peerUaHealthy: peerUaHealthy,
|
||||
peerHttpHealthy: peerHttpHealthy,
|
||||
applyInProgress: _leases.IsApplyInProgress,
|
||||
recoveryDwellMet: _recovery.IsDwellMet(),
|
||||
topologyValid: _coordinator.IsTopologyValid,
|
||||
operatorMaintenance: _operatorMaintenance());
|
||||
|
||||
MaybeFireServerUriArray(topology);
|
||||
return Emit(value, topology);
|
||||
}
|
||||
|
||||
private static RedundancyRole MapRole(RedundancyRole role) => role switch
|
||||
{
|
||||
// Standalone is serving; treat as Primary for the matrix since the calculator
|
||||
// already special-cases Standalone inside its Compute.
|
||||
RedundancyRole.Primary => RedundancyRole.Primary,
|
||||
RedundancyRole.Secondary => RedundancyRole.Secondary,
|
||||
_ => RedundancyRole.Standalone,
|
||||
};
|
||||
|
||||
private ServiceLevelSnapshot Emit(byte value, RedundancyTopology? topology)
|
||||
{
|
||||
var snap = new ServiceLevelSnapshot(
|
||||
Value: value,
|
||||
Band: ServiceLevelCalculator.Classify(value),
|
||||
Topology: topology);
|
||||
|
||||
if (value != _lastByte)
|
||||
{
|
||||
_lastByte = value;
|
||||
OnStateChanged?.Invoke(snap);
|
||||
}
|
||||
return snap;
|
||||
}
|
||||
|
||||
private void MaybeFireServerUriArray(RedundancyTopology topology)
|
||||
{
|
||||
var current = topology.ServerUriArray();
|
||||
if (_lastServerUriArray is null || !current.SequenceEqual(_lastServerUriArray, StringComparer.Ordinal))
|
||||
{
|
||||
_lastServerUriArray = current;
|
||||
OnServerUriArrayChanged?.Invoke(current);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Per-tick output of <see cref="RedundancyStatePublisher.ComputeAndPublish"/>.</summary>
|
||||
public sealed record ServiceLevelSnapshot(
|
||||
byte Value,
|
||||
ServiceLevelBand Band,
|
||||
RedundancyTopology? Topology);
|
||||
@@ -0,0 +1,55 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot of the cluster topology the <see cref="RedundancyCoordinator"/> holds. Read
|
||||
/// once at startup + refreshed on publish-generation notification. Immutable — every
|
||||
/// refresh produces a new instance so observers can compare identity-equality to detect
|
||||
/// topology change.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per Phase 6.3 Stream A.1. Invariants enforced by the loader (see
|
||||
/// <see cref="ClusterTopologyLoader"/>): at most one Primary per cluster for
|
||||
/// WarmActive/Hot redundancy modes; every node has a unique ApplicationUri (OPC UA
|
||||
/// Part 4 requirement — clients pin trust here); at most 2 nodes total per cluster
|
||||
/// (decision #83).
|
||||
/// </remarks>
|
||||
public sealed record RedundancyTopology(
|
||||
string ClusterId,
|
||||
string SelfNodeId,
|
||||
RedundancyRole SelfRole,
|
||||
RedundancyMode Mode,
|
||||
IReadOnlyList<RedundancyPeer> Peers,
|
||||
string SelfApplicationUri)
|
||||
{
|
||||
/// <summary>Peer count — 0 for a standalone (single-node) cluster, 1 for v2 two-node clusters.</summary>
|
||||
public int PeerCount => Peers.Count;
|
||||
|
||||
/// <summary>
|
||||
/// ServerUriArray shape per OPC UA Part 4 §6.6.2.2 — self first, peers in stable
|
||||
/// deterministic order (lexicographic by NodeId), self's ApplicationUri always at index 0.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> ServerUriArray() =>
|
||||
new[] { SelfApplicationUri }
|
||||
.Concat(Peers.OrderBy(p => p.NodeId, StringComparer.OrdinalIgnoreCase).Select(p => p.ApplicationUri))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <summary>One peer in the cluster (every node other than self).</summary>
|
||||
/// <param name="NodeId">Peer's stable logical NodeId (e.g. <c>"LINE3-OPCUA-B"</c>).</param>
|
||||
/// <param name="Role">Peer's declared redundancy role from the shared config DB.</param>
|
||||
/// <param name="Host">Peer's hostname / IP — drives the health-probe target.</param>
|
||||
/// <param name="OpcUaPort">Peer's OPC UA endpoint port.</param>
|
||||
/// <param name="DashboardPort">Peer's dashboard / health-endpoint port.</param>
|
||||
/// <param name="ApplicationUri">Peer's declared ApplicationUri (carried in <see cref="RedundancyTopology.ServerUriArray"/>).</param>
|
||||
public sealed record RedundancyPeer(
|
||||
string NodeId,
|
||||
RedundancyRole Role,
|
||||
string Host,
|
||||
int OpcUaPort,
|
||||
int DashboardPort,
|
||||
string ApplicationUri);
|
||||
|
||||
/// <summary>Thrown when the loader detects a topology-invariant violation at startup or refresh.</summary>
|
||||
public sealed class InvalidTopologyException(string message) : Exception(message);
|
||||
@@ -0,0 +1,139 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Server;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ConfigRedundancyMode = ZB.MOM.WW.OtOpcUa.Configuration.Enums.RedundancyMode;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream C (task #147) — the seam that carries the
|
||||
/// <see cref="RedundancyStatePublisher"/>'s computed values onto the standard OPC UA
|
||||
/// Server object nodes:
|
||||
/// <list type="bullet">
|
||||
/// <item><c>Server.ServiceLevel</c> (<see cref="VariableIds.Server_ServiceLevel"/>)
|
||||
/// — Byte (0..255), Part 5 §6.3.34. Clients poll to pick the healthiest peer.</item>
|
||||
/// <item><c>Server.ServerRedundancy.RedundancySupport</c>
|
||||
/// (<see cref="VariableIds.Server_ServerRedundancy_RedundancySupport"/>)
|
||||
/// — advertises Warm / Hot / Cold / None per Part 4 §6.6.2.</item>
|
||||
/// <item><c>Server.ServerRedundancy.ServerUriArray</c>
|
||||
/// (<see cref="VariableIds.NonTransparentRedundancyType_ServerUriArray"/>
|
||||
/// when the redundancy node is upgraded to non-transparent)
|
||||
/// — ApplicationUri of every node in the pair, self first.</item>
|
||||
/// </list>
|
||||
/// The writer is constructed once during the <c>OtOpcUaServer.OnServerStarted</c> hook;
|
||||
/// callers invoke <see cref="ApplyServiceLevel"/> / <see cref="ApplyServerUriArray"/> /
|
||||
/// <see cref="ApplyRedundancySupport"/> on publisher events. Each setter updates the
|
||||
/// underlying <see cref="BaseVariableState.Value"/> then calls
|
||||
/// <see cref="NodeState.ClearChangeMasks"/> to flush the change to subscribers.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The writer is defensive: if the expected node shape isn't present on this particular
|
||||
/// SDK build (e.g. <c>ServerUriArray</c> only exists on the
|
||||
/// <c>NonTransparentRedundancyType</c> subtype and the ServerObject's default
|
||||
/// <c>ServerRedundancy</c> property is the base type) the writer logs a warning once and
|
||||
/// skips that specific update rather than throwing — matches the SDK's own tolerance
|
||||
/// for optional address-space shape.
|
||||
/// </remarks>
|
||||
public sealed class ServerRedundancyNodeWriter
|
||||
{
|
||||
private readonly IServerInternal _server;
|
||||
private readonly ILogger<ServerRedundancyNodeWriter> _logger;
|
||||
private readonly object _gate = new();
|
||||
|
||||
private bool _warnedMissingServerUriArray;
|
||||
private byte? _lastServiceLevel;
|
||||
private RedundancySupport? _lastRedundancySupport;
|
||||
private IReadOnlyList<string>? _lastServerUriArray;
|
||||
|
||||
public ServerRedundancyNodeWriter(IServerInternal server, ILogger<ServerRedundancyNodeWriter> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(server);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
_server = server;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Push a new Byte value onto <c>Server.ServiceLevel</c> + notify subscribers.</summary>
|
||||
public void ApplyServiceLevel(byte value)
|
||||
{
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServiceLevel is not { } node) return;
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastServiceLevel == value) return;
|
||||
_lastServiceLevel = value;
|
||||
node.Value = value;
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Map the Configuration-side <see cref="ConfigRedundancyMode"/> to OPC UA's
|
||||
/// <see cref="RedundancySupport"/> enum + apply to
|
||||
/// <c>Server.ServerRedundancy.RedundancySupport</c>. Called once at
|
||||
/// the <c>OtOpcUaServer.OnServerStarted</c> hook — the value is effectively static per
|
||||
/// deployment.
|
||||
/// </summary>
|
||||
public void ApplyRedundancySupport(ConfigRedundancyMode mode)
|
||||
{
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServerRedundancy?.RedundancySupport is not { } node) return;
|
||||
|
||||
// RedundancyMode only declares None / Warm / Hot in v2.0 (non-transparent only per
|
||||
// decision #85). OPC UA's RedundancySupport has more states — clamp to the three we
|
||||
// support and let config-DB CHECK constraints prevent surprises.
|
||||
var support = mode switch
|
||||
{
|
||||
ConfigRedundancyMode.Warm => RedundancySupport.Warm,
|
||||
ConfigRedundancyMode.Hot => RedundancySupport.Hot,
|
||||
_ => RedundancySupport.None,
|
||||
};
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastRedundancySupport == support) return;
|
||||
_lastRedundancySupport = support;
|
||||
node.Value = support;
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Push the self-first peer-URI list onto
|
||||
/// <c>Server.ServerRedundancy.ServerUriArray</c>. Only applies when the SDK created
|
||||
/// <c>ServerRedundancy</c> as <see cref="NonTransparentRedundancyState"/>; on the
|
||||
/// base <see cref="ServerRedundancyState"/> the child is absent and we log-and-skip.
|
||||
/// </summary>
|
||||
public void ApplyServerUriArray(IReadOnlyList<string> serverUris)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(serverUris);
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServerRedundancy is not NonTransparentRedundancyState ntr
|
||||
|| ntr.ServerUriArray is not { } node)
|
||||
{
|
||||
if (!_warnedMissingServerUriArray)
|
||||
{
|
||||
_warnedMissingServerUriArray = true;
|
||||
_logger.LogWarning(
|
||||
"Server.ServerRedundancy is not NonTransparentRedundancyState — ServerUriArray " +
|
||||
"cannot be published on this server instance. Clients will not see peer URIs " +
|
||||
"on the Part 4 §6.6.2 redundancy node until the redundancy-object type is upgraded.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastServerUriArray is not null && _lastServerUriArray.SequenceEqual(serverUris, StringComparer.Ordinal))
|
||||
return;
|
||||
_lastServerUriArray = [.. serverUris];
|
||||
node.Value = [.. serverUris];
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,131 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Pure-function translator from the redundancy-state inputs (role, self health, peer
|
||||
/// reachability via HTTP + UA probes, apply-in-progress flag, recovery dwell, topology
|
||||
/// validity) to the OPC UA Part 5 §6.3.34 <see cref="byte"/> ServiceLevel value.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Per decision #154 the 8-state matrix avoids the reserved bands (0=Maintenance,
|
||||
/// 1=NoData) for operational states. Operational values occupy 2..255 so a spec-compliant
|
||||
/// client that cuts over on "<3 = unhealthy" keeps working without its vendor treating
|
||||
/// the server as "under maintenance" during normal runtime.</para>
|
||||
///
|
||||
/// <para>This class is pure — no threads, no I/O. The coordinator that owns it re-evaluates
|
||||
/// on every input change and pushes the new byte through an <c>IObserver<byte></c> to
|
||||
/// the OPC UA ServiceLevel variable. Tests exercise the full matrix without touching a UA
|
||||
/// stack.</para>
|
||||
/// </remarks>
|
||||
public static class ServiceLevelCalculator
|
||||
{
|
||||
/// <summary>Compute the ServiceLevel for the given inputs.</summary>
|
||||
/// <param name="role">Role declared for this node in the shared config DB.</param>
|
||||
/// <param name="selfHealthy">This node's own health (from Phase 6.1 /healthz).</param>
|
||||
/// <param name="peerUaHealthy">Peer node reachable via OPC UA probe.</param>
|
||||
/// <param name="peerHttpHealthy">Peer node reachable via HTTP /healthz probe.</param>
|
||||
/// <param name="applyInProgress">True while this node is inside a publish-generation apply window.</param>
|
||||
/// <param name="recoveryDwellMet">True once the post-fault dwell + publish-witness conditions are met.</param>
|
||||
/// <param name="topologyValid">False when the cluster has detected >1 Primary (InvalidTopology demotes both nodes).</param>
|
||||
/// <param name="operatorMaintenance">True when operator has declared the node in maintenance.</param>
|
||||
public static byte Compute(
|
||||
RedundancyRole role,
|
||||
bool selfHealthy,
|
||||
bool peerUaHealthy,
|
||||
bool peerHttpHealthy,
|
||||
bool applyInProgress,
|
||||
bool recoveryDwellMet,
|
||||
bool topologyValid,
|
||||
bool operatorMaintenance = false)
|
||||
{
|
||||
// Reserved bands first — they override everything per OPC UA Part 5 §6.3.34.
|
||||
if (operatorMaintenance) return (byte)ServiceLevelBand.Maintenance; // 0
|
||||
if (!selfHealthy) return (byte)ServiceLevelBand.NoData; // 1
|
||||
if (!topologyValid) return (byte)ServiceLevelBand.InvalidTopology; // 2
|
||||
|
||||
// Standalone nodes have no peer — treat as authoritative when healthy.
|
||||
if (role == RedundancyRole.Standalone)
|
||||
return (byte)(applyInProgress ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.AuthoritativePrimary);
|
||||
|
||||
var isPrimary = role == RedundancyRole.Primary;
|
||||
|
||||
// Apply-in-progress band dominates recovery + isolation (client should cut to peer).
|
||||
if (applyInProgress)
|
||||
return (byte)(isPrimary ? ServiceLevelBand.PrimaryMidApply : ServiceLevelBand.BackupMidApply);
|
||||
|
||||
// Post-fault recovering — hold until dwell + witness satisfied.
|
||||
if (!recoveryDwellMet)
|
||||
return (byte)(isPrimary ? ServiceLevelBand.RecoveringPrimary : ServiceLevelBand.RecoveringBackup);
|
||||
|
||||
// Peer unreachable (either probe fails) → isolated band. Per decision #154 Primary
|
||||
// retains authority at 230 when isolated; Backup signals 80 "take over if asked" and
|
||||
// does NOT auto-promote (non-transparent model).
|
||||
var peerReachable = peerUaHealthy && peerHttpHealthy;
|
||||
if (!peerReachable)
|
||||
return (byte)(isPrimary ? ServiceLevelBand.IsolatedPrimary : ServiceLevelBand.IsolatedBackup);
|
||||
|
||||
return (byte)(isPrimary ? ServiceLevelBand.AuthoritativePrimary : ServiceLevelBand.AuthoritativeBackup);
|
||||
}
|
||||
|
||||
/// <summary>Labels a ServiceLevel byte with its matrix band name — for logs + Admin UI.</summary>
|
||||
public static ServiceLevelBand Classify(byte value) => value switch
|
||||
{
|
||||
(byte)ServiceLevelBand.Maintenance => ServiceLevelBand.Maintenance,
|
||||
(byte)ServiceLevelBand.NoData => ServiceLevelBand.NoData,
|
||||
(byte)ServiceLevelBand.InvalidTopology => ServiceLevelBand.InvalidTopology,
|
||||
(byte)ServiceLevelBand.RecoveringBackup => ServiceLevelBand.RecoveringBackup,
|
||||
(byte)ServiceLevelBand.BackupMidApply => ServiceLevelBand.BackupMidApply,
|
||||
(byte)ServiceLevelBand.IsolatedBackup => ServiceLevelBand.IsolatedBackup,
|
||||
(byte)ServiceLevelBand.AuthoritativeBackup => ServiceLevelBand.AuthoritativeBackup,
|
||||
(byte)ServiceLevelBand.RecoveringPrimary => ServiceLevelBand.RecoveringPrimary,
|
||||
(byte)ServiceLevelBand.PrimaryMidApply => ServiceLevelBand.PrimaryMidApply,
|
||||
(byte)ServiceLevelBand.IsolatedPrimary => ServiceLevelBand.IsolatedPrimary,
|
||||
(byte)ServiceLevelBand.AuthoritativePrimary => ServiceLevelBand.AuthoritativePrimary,
|
||||
_ => ServiceLevelBand.Unknown,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Named bands of the 8-state ServiceLevel matrix. Numeric values match the
|
||||
/// <see cref="ServiceLevelCalculator"/> table exactly; any drift will be caught by the
|
||||
/// Phase 6.3 compliance script.
|
||||
/// </summary>
|
||||
public enum ServiceLevelBand : byte
|
||||
{
|
||||
/// <summary>Operator-declared maintenance. Reserved per OPC UA Part 5 §6.3.34.</summary>
|
||||
Maintenance = 0,
|
||||
|
||||
/// <summary>Unreachable / Faulted. Reserved per OPC UA Part 5 §6.3.34.</summary>
|
||||
NoData = 1,
|
||||
|
||||
/// <summary>Detected-inconsistency band — >1 Primary observed runtime; both nodes self-demote.</summary>
|
||||
InvalidTopology = 2,
|
||||
|
||||
/// <summary>Backup post-fault, dwell not met.</summary>
|
||||
RecoveringBackup = 30,
|
||||
|
||||
/// <summary>Backup inside a publish-apply window.</summary>
|
||||
BackupMidApply = 50,
|
||||
|
||||
/// <summary>Backup with unreachable Primary — "take over if asked"; does NOT auto-promote.</summary>
|
||||
IsolatedBackup = 80,
|
||||
|
||||
/// <summary>Backup nominal operation.</summary>
|
||||
AuthoritativeBackup = 100,
|
||||
|
||||
/// <summary>Primary post-fault, dwell not met.</summary>
|
||||
RecoveringPrimary = 180,
|
||||
|
||||
/// <summary>Primary inside a publish-apply window.</summary>
|
||||
PrimaryMidApply = 200,
|
||||
|
||||
/// <summary>Primary with unreachable peer, self serving — retains authority.</summary>
|
||||
IsolatedPrimary = 230,
|
||||
|
||||
/// <summary>Primary nominal operation.</summary>
|
||||
AuthoritativePrimary = 255,
|
||||
|
||||
/// <summary>Sentinel for unrecognised byte values.</summary>
|
||||
Unknown = 254,
|
||||
}
|
||||
100
src/Server/ZB.MOM.WW.OtOpcUa.Server/SealedBootstrap.cs
Normal file
100
src/Server/ZB.MOM.WW.OtOpcUa.Server/SealedBootstrap.cs
Normal file
@@ -0,0 +1,100 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.1 Stream D consumption hook — bootstraps the node's current generation through
|
||||
/// the <see cref="ResilientConfigReader"/> pipeline + writes every successful central-DB
|
||||
/// read into the <see cref="GenerationSealedCache"/> so the next cache-miss path has a
|
||||
/// sealed snapshot to fall back to.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Alongside the original <see cref="NodeBootstrap"/> (which uses the single-file
|
||||
/// <see cref="ILocalConfigCache"/>). Program.cs can switch to this one once operators are
|
||||
/// ready for the generation-sealed semantics. The original stays for backward compat
|
||||
/// with the three integration tests that construct <see cref="NodeBootstrap"/> directly.</para>
|
||||
///
|
||||
/// <para>Closes release blocker #2 in <c>docs/v2/v2-release-readiness.md</c> — the
|
||||
/// generation-sealed cache + resilient reader + stale-config flag ship as unit-tested
|
||||
/// primitives in PR #81 but no production path consumed them until this wrapper.</para>
|
||||
/// </remarks>
|
||||
public sealed class SealedBootstrap
|
||||
{
|
||||
private readonly NodeOptions _options;
|
||||
private readonly GenerationSealedCache _cache;
|
||||
private readonly ResilientConfigReader _reader;
|
||||
private readonly StaleConfigFlag _staleFlag;
|
||||
private readonly ILogger<SealedBootstrap> _logger;
|
||||
|
||||
public SealedBootstrap(
|
||||
NodeOptions options,
|
||||
GenerationSealedCache cache,
|
||||
ResilientConfigReader reader,
|
||||
StaleConfigFlag staleFlag,
|
||||
ILogger<SealedBootstrap> logger)
|
||||
{
|
||||
_options = options;
|
||||
_cache = cache;
|
||||
_reader = reader;
|
||||
_staleFlag = staleFlag;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve the current generation for this node. Routes the central-DB fetch through
|
||||
/// <see cref="ResilientConfigReader"/> (timeout → retry → fallback-to-cache) + seals a
|
||||
/// fresh snapshot on every successful DB read so a future cache-miss has something to
|
||||
/// serve.
|
||||
/// </summary>
|
||||
public async Task<BootstrapResult> LoadCurrentGenerationAsync(CancellationToken ct)
|
||||
{
|
||||
return await _reader.ReadAsync(
|
||||
_options.ClusterId,
|
||||
centralFetch: async innerCt => await FetchFromCentralAsync(innerCt).ConfigureAwait(false),
|
||||
fromSnapshot: snap => BootstrapResult.FromCache(snap.GenerationId),
|
||||
ct).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private async ValueTask<BootstrapResult> FetchFromCentralAsync(CancellationToken ct)
|
||||
{
|
||||
await using var conn = new SqlConnection(_options.ConfigDbConnectionString);
|
||||
await conn.OpenAsync(ct).ConfigureAwait(false);
|
||||
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "EXEC dbo.sp_GetCurrentGenerationForCluster @NodeId=@n, @ClusterId=@c";
|
||||
cmd.Parameters.AddWithValue("@n", _options.NodeId);
|
||||
cmd.Parameters.AddWithValue("@c", _options.ClusterId);
|
||||
|
||||
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
|
||||
if (!await reader.ReadAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
_logger.LogWarning("Cluster {Cluster} has no Published generation yet", _options.ClusterId);
|
||||
return BootstrapResult.EmptyFromDb();
|
||||
}
|
||||
|
||||
var generationId = reader.GetInt64(0);
|
||||
_logger.LogInformation("Bootstrapped from central DB: generation {GenerationId}; sealing snapshot", generationId);
|
||||
|
||||
// Seal a minimal snapshot with the generation pointer. A richer snapshot that carries
|
||||
// the full sp_GetGenerationContent payload lands when the bootstrap flow grows to
|
||||
// consume the content during offline operation (separate follow-up — see decision #148
|
||||
// and phase-6-1 Stream D.3). The pointer alone is enough for the fallback path to
|
||||
// surface the last-known-good generation id + flip UsingStaleConfig.
|
||||
await _cache.SealAsync(new GenerationSnapshot
|
||||
{
|
||||
ClusterId = _options.ClusterId,
|
||||
GenerationId = generationId,
|
||||
CachedAt = DateTime.UtcNow,
|
||||
PayloadJson = JsonSerializer.Serialize(new { generationId, source = "sp_GetCurrentGenerationForCluster" }),
|
||||
}, ct).ConfigureAwait(false);
|
||||
|
||||
// StaleConfigFlag bookkeeping: ResilientConfigReader.MarkFresh on the returning call
|
||||
// path; we're on the fresh branch so we don't touch the flag here.
|
||||
_ = _staleFlag; // held so the field isn't flagged unused
|
||||
|
||||
return BootstrapResult.FromDb(generationId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Bootstraps the Phase 6.2 authorization pipeline for the running Server. Loads
|
||||
/// <c>NodeAcl</c> rows for the current generation into a
|
||||
/// <see cref="PermissionTrieCache"/>, constructs an <see cref="AuthorizationGate"/>,
|
||||
/// and merges per-namespace <see cref="EquipmentNamespaceContent"/> into a single
|
||||
/// full-path index for <see cref="NodeScopeResolver"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Called by <c>OpcUaServerService.ExecuteAsync</c> after the
|
||||
/// <see cref="DriverEquipmentContentRegistry"/> has been populated but before
|
||||
/// <c>OpcUaApplicationHost.StartAsync</c> runs — that's the window where the
|
||||
/// config-DB state is known + the OPC UA server hasn't yet captured the gate
|
||||
/// references.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <see cref="AuthorizationOptions.Enabled"/> gates the whole flow. When
|
||||
/// <c>false</c> (default), <see cref="BuildAsync"/> returns <c>(null, null)</c>
|
||||
/// and the dispatch layer short-circuits every ACL check — identical to
|
||||
/// pre-Phase-6.2.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuthorizationBootstrap(
|
||||
IDbContextFactory<OtOpcUaConfigDbContext> dbFactory,
|
||||
DriverEquipmentContentRegistry equipmentContentRegistry,
|
||||
NodeOptions nodeOptions,
|
||||
ILogger<AuthorizationBootstrap> logger)
|
||||
{
|
||||
/// <summary>
|
||||
/// Build a gate + resolver pair for the supplied <paramref name="generationId"/>.
|
||||
/// Returns <c>(null, null)</c> when authorization is disabled via
|
||||
/// <see cref="AuthorizationOptions.Enabled"/> or when the generation couldn't be
|
||||
/// fetched — in that case the dispatch layer runs without ACL enforcement (same
|
||||
/// behaviour the Server had before Phase 6.2 Stream C landed).
|
||||
/// </summary>
|
||||
public async Task<(AuthorizationGate?, NodeScopeResolver?)> BuildAsync(
|
||||
long? generationId, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!nodeOptions.Authorization.Enabled)
|
||||
{
|
||||
logger.LogInformation(
|
||||
"Authorization disabled (Node:Authorization:Enabled=false); all ACL gates remain inert");
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
if (generationId is not long gen)
|
||||
{
|
||||
logger.LogWarning(
|
||||
"Authorization enabled but no Published generation available — ACL enforcement skipped until next publish");
|
||||
return (null, null);
|
||||
}
|
||||
|
||||
var gate = await BuildGateAsync(gen, cancellationToken).ConfigureAwait(false);
|
||||
var resolver = BuildResolver();
|
||||
|
||||
logger.LogInformation(
|
||||
"Authorization pipeline bootstrapped — generation {Gen}, strictMode={Strict}",
|
||||
gen, nodeOptions.Authorization.StrictMode);
|
||||
|
||||
return (gate, resolver);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load every <see cref="Configuration.Entities.NodeAcl"/> row for
|
||||
/// <paramref name="generationId"/> scoped to this node's cluster, build a
|
||||
/// <see cref="PermissionTrieCache"/>, construct an <see cref="AuthorizationGate"/>.
|
||||
/// </summary>
|
||||
private async Task<AuthorizationGate> BuildGateAsync(long generationId, CancellationToken cancellationToken)
|
||||
{
|
||||
await using var ctx = await dbFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false);
|
||||
var rows = await ctx.NodeAcls
|
||||
.AsNoTracking()
|
||||
.Where(a => a.ClusterId == nodeOptions.ClusterId && a.GenerationId == generationId)
|
||||
.ToListAsync(cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var cache = new PermissionTrieCache();
|
||||
cache.Install(PermissionTrieBuilder.Build(nodeOptions.ClusterId, generationId, rows));
|
||||
var evaluator = new TriePermissionEvaluator(cache);
|
||||
return new AuthorizationGate(evaluator, strictMode: nodeOptions.Authorization.StrictMode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merge each registered driver's <see cref="EquipmentNamespaceContent"/> into a single
|
||||
/// full-path index. Tag rows that cross-reference missing Equipment / Line / Area are
|
||||
/// silently skipped (the cluster-only fallback handles them). Duplicate TagConfig
|
||||
/// across namespaces is a config error — <see cref="ScopePathIndexBuilder"/> throws
|
||||
/// on collision; we let that bubble so bootstrap fails fast.
|
||||
/// </summary>
|
||||
private NodeScopeResolver BuildResolver()
|
||||
{
|
||||
var merged = new Dictionary<string, NodeScope>(StringComparer.Ordinal);
|
||||
foreach (var kv in equipmentContentRegistry.Snapshot())
|
||||
{
|
||||
// Namespace id isn't carried on EquipmentNamespaceContent directly — driverId
|
||||
// serves as the namespace-stable key for ACL scope resolution.
|
||||
var perNamespace = ScopePathIndexBuilder.Build(nodeOptions.ClusterId, kv.Key, kv.Value);
|
||||
foreach (var entry in perNamespace)
|
||||
merged[entry.Key] = entry.Value;
|
||||
}
|
||||
|
||||
return merged.Count == 0
|
||||
? new NodeScopeResolver(nodeOptions.ClusterId)
|
||||
: new NodeScopeResolver(nodeOptions.ClusterId, merged);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
using Opc.Ua;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Bridges the OPC UA stack's <see cref="ISystemContext.UserIdentity"/> to the
|
||||
/// <see cref="IPermissionEvaluator"/> evaluator. Resolves the session's
|
||||
/// <see cref="UserAuthorizationState"/> from whatever the identity claims + the stack's
|
||||
/// session handle, then delegates to the evaluator and returns a single bool the
|
||||
/// dispatch paths can use to short-circuit with <c>BadUserAccessDenied</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>This class is deliberately the single integration seam between the Server
|
||||
/// project and the <c>Core.Authorization</c> evaluator. DriverNodeManager holds one
|
||||
/// reference and calls <see cref="IsAllowed"/> on every Read / Write / HistoryRead /
|
||||
/// Browse / Call / CreateMonitoredItems / etc. The evaluator itself stays pure — it
|
||||
/// doesn't know about the OPC UA stack types.</para>
|
||||
///
|
||||
/// <para>Fail-open-during-transition: when the evaluator is configured with
|
||||
/// <c>StrictMode = false</c>, missing cluster tries OR sessions without resolved
|
||||
/// LDAP groups get <c>true</c> so existing deployments keep working while ACLs are
|
||||
/// populated. Flip to strict via <c>Authorization:StrictMode = true</c> in production.</para>
|
||||
/// </remarks>
|
||||
public sealed class AuthorizationGate
|
||||
{
|
||||
private readonly IPermissionEvaluator _evaluator;
|
||||
private readonly bool _strictMode;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public AuthorizationGate(IPermissionEvaluator evaluator, bool strictMode = false, TimeProvider? timeProvider = null)
|
||||
{
|
||||
_evaluator = evaluator ?? throw new ArgumentNullException(nameof(evaluator));
|
||||
_strictMode = strictMode;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
/// <summary>True when strict authorization is enabled — no-grant = denied.</summary>
|
||||
public bool StrictMode => _strictMode;
|
||||
|
||||
/// <summary>
|
||||
/// Authorize an OPC UA operation against the session identity + scope. Returns true to
|
||||
/// allow the dispatch to continue; false to surface <c>BadUserAccessDenied</c>.
|
||||
/// </summary>
|
||||
public bool IsAllowed(IUserIdentity? identity, OpcUaOperation operation, NodeScope scope)
|
||||
{
|
||||
// Anonymous / unknown identity — strict mode denies, lax mode allows so the fallback
|
||||
// auth layers (WriteAuthzPolicy) still see the call.
|
||||
if (identity is null) return !_strictMode;
|
||||
|
||||
var session = BuildSessionState(identity, scope.ClusterId);
|
||||
if (session is null)
|
||||
{
|
||||
// Identity doesn't carry LDAP groups. In lax mode let the dispatch proceed so
|
||||
// older deployments keep working; strict mode denies.
|
||||
return !_strictMode;
|
||||
}
|
||||
|
||||
var decision = _evaluator.Authorize(session, operation, scope);
|
||||
if (decision.IsAllowed) return true;
|
||||
|
||||
return !_strictMode;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Materialize a <see cref="UserAuthorizationState"/> from the session identity.
|
||||
/// Returns null when the identity doesn't carry LDAP group metadata.
|
||||
/// </summary>
|
||||
public UserAuthorizationState? BuildSessionState(IUserIdentity identity, string clusterId)
|
||||
{
|
||||
if (identity is not ILdapGroupsBearer bearer || bearer.LdapGroups.Count == 0)
|
||||
return null;
|
||||
|
||||
var sessionId = identity.DisplayName ?? Guid.NewGuid().ToString("N");
|
||||
return new UserAuthorizationState
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClusterId = clusterId,
|
||||
LdapGroups = bearer.LdapGroups,
|
||||
MembershipResolvedUtc = _timeProvider.GetUtcNow().UtcDateTime,
|
||||
AuthGenerationId = 0,
|
||||
MembershipVersion = 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the Phase 6.2 authorization pipeline. Bound from the
|
||||
/// <c>Node:Authorization</c> section of <c>appsettings.json</c>. Defaults ship disabled
|
||||
/// so upgrading from pre-Phase-6.2 doesn't accidentally start denying reads the day a
|
||||
/// new build lands — operators opt in explicitly once their <c>NodeAcl</c> rows are
|
||||
/// populated.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <see cref="Enabled"/> is the master switch. When <c>false</c> (default),
|
||||
/// the OPC UA application host constructs with
|
||||
/// <c>authzGate: null, scopeResolver: null</c>; all six dispatch-layer gates
|
||||
/// (Read, Write, HistoryRead, Browse, CreateMonitoredItems, Call) short-circuit
|
||||
/// to pass — identical behaviour to pre-Phase-6.2.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// When <c>true</c>, <see cref="StrictMode"/> picks between two failure modes:
|
||||
/// <c>false</c> (default) grants anonymous / no-LDAP-groups identities a pass-
|
||||
/// through so v1-style legacy clients keep working; <c>true</c> denies them.
|
||||
/// Production deployments should flip to <c>StrictMode = true</c> once every
|
||||
/// client has been validated against the new identity flow.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuthorizationOptions
|
||||
{
|
||||
/// <summary>Master switch. False = gate is inert; true = gate is wired into dispatch.</summary>
|
||||
public bool Enabled { get; init; }
|
||||
|
||||
/// <summary>False = anonymous / no-groups identities pass; true = they're denied.</summary>
|
||||
public bool StrictMode { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Minimal interface an <see cref="Opc.Ua.IUserIdentity"/> exposes so the Phase 6.2
|
||||
/// authorization evaluator can read the session's resolved LDAP group DNs without a
|
||||
/// hard dependency on any specific identity subtype. Implemented by OtOpcUaServer's
|
||||
/// role-based identity; tests stub it to drive the evaluator under different group
|
||||
/// memberships.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Control/data-plane separation (decision #150): Admin UI role routing consumes
|
||||
/// <see cref="IRoleBearer.Roles"/> via <c>LdapGroupRoleMapping</c>; the OPC UA data-path
|
||||
/// evaluator consumes <see cref="LdapGroups"/> directly against <c>NodeAcl</c>. The two
|
||||
/// are sourced from the same directory query at sign-in but never cross.
|
||||
/// </remarks>
|
||||
public interface ILdapGroupsBearer
|
||||
{
|
||||
/// <summary>Fully-qualified LDAP group DNs the user is a member of.</summary>
|
||||
IReadOnlyList<string> LdapGroups { get; }
|
||||
}
|
||||
13
src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/IRoleBearer.cs
Normal file
13
src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/IRoleBearer.cs
Normal file
@@ -0,0 +1,13 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Minimal interface a <see cref="Opc.Ua.IUserIdentity"/> implementation can expose so
|
||||
/// <see cref="ZB.MOM.WW.OtOpcUa.Server.OpcUa.DriverNodeManager"/> can read the session's
|
||||
/// resolved roles without a hard dependency on any specific identity subtype. Implemented
|
||||
/// by <c>OtOpcUaServer.RoleBasedIdentity</c>; tests implement it with stub identities to
|
||||
/// drive the authz policy under different role sets.
|
||||
/// </summary>
|
||||
public interface IRoleBearer
|
||||
{
|
||||
IReadOnlyList<string> Roles { get; }
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Validates a (username, password) pair and returns the resolved OPC UA roles for the user.
|
||||
/// The Server's <c>SessionManager_ImpersonateUser</c> hook delegates here so unit tests can
|
||||
/// swap in a fake authenticator without a live LDAP.
|
||||
/// </summary>
|
||||
public interface IUserAuthenticator
|
||||
{
|
||||
Task<UserAuthResult> AuthenticateAsync(string username, string password, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
/// <param name="Success">True iff the bind succeeded and roles/groups were resolved.</param>
|
||||
/// <param name="DisplayName">User display name from LDAP, or null on failure.</param>
|
||||
/// <param name="Roles">Mapped OPC UA role names (Admin / control-plane consumption — see decision #150).</param>
|
||||
/// <param name="Groups">Raw LDAP group names the user belongs to. Phase 6.2 data-path authorization
|
||||
/// (NodeAcl evaluator) keys off this list directly, not Roles. Empty for anonymous / failed binds.</param>
|
||||
/// <param name="Error">Human-readable failure reason, or null on success.</param>
|
||||
public sealed record UserAuthResult(
|
||||
bool Success, string? DisplayName, IReadOnlyList<string> Roles, IReadOnlyList<string> Groups, string? Error);
|
||||
|
||||
/// <summary>
|
||||
/// Always-reject authenticator used when no security config is provided. Lets the server
|
||||
/// start (with only an anonymous endpoint) without throwing on UserName token attempts.
|
||||
/// </summary>
|
||||
public sealed class DenyAllUserAuthenticator : IUserAuthenticator
|
||||
{
|
||||
public Task<UserAuthResult> AuthenticateAsync(string _, string __, CancellationToken ___)
|
||||
=> Task.FromResult(new UserAuthResult(false, null, [], [], "UserName token not supported"));
|
||||
}
|
||||
72
src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/LdapOptions.cs
Normal file
72
src/Server/ZB.MOM.WW.OtOpcUa.Server/Security/LdapOptions.cs
Normal file
@@ -0,0 +1,72 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// LDAP settings for the OPC UA server's UserName token validator. Bound from
|
||||
/// <c>appsettings.json</c> <c>OpcUaServer:Ldap</c>. Defaults target the GLAuth dev instance
|
||||
/// (localhost:3893, <c>dc=lmxopcua,dc=local</c>) for the stock inner-loop setup. Production
|
||||
/// deployments are expected to point at Active Directory; see <see cref="UserNameAttribute"/>
|
||||
/// and the per-field xml-docs for the AD-specific overrides.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para><b>Active Directory cheat-sheet</b>:</para>
|
||||
/// <list type="bullet">
|
||||
/// <item><see cref="Server"/>: one of the domain controllers, or the domain FQDN (will round-robin DCs).</item>
|
||||
/// <item><see cref="Port"/>: <c>389</c> (LDAP) or <c>636</c> (LDAPS); use 636 + <see cref="UseTls"/> in production.</item>
|
||||
/// <item><see cref="UseTls"/>: <c>true</c>. AD increasingly rejects plain-LDAP bind under LDAP-signing enforcement.</item>
|
||||
/// <item><see cref="AllowInsecureLdap"/>: <c>false</c>. Dev escape hatch only.</item>
|
||||
/// <item><see cref="SearchBase"/>: <c>DC=corp,DC=example,DC=com</c> — your domain's base DN.</item>
|
||||
/// <item><see cref="ServiceAccountDn"/>: a dedicated service principal with read access to user + group entries
|
||||
/// (e.g. <c>CN=OpcUaSvc,OU=Service Accounts,DC=corp,DC=example,DC=com</c>). Never a privileged admin.</item>
|
||||
/// <item><see cref="UserNameAttribute"/>: <c>sAMAccountName</c> (classic login name) or <c>userPrincipalName</c>
|
||||
/// (user@domain form). Default is <c>uid</c> which AD does <b>not</b> populate, so this override is required.</item>
|
||||
/// <item><see cref="DisplayNameAttribute"/>: <c>displayName</c> gives the human name; <c>cn</c> works too but is less rich.</item>
|
||||
/// <item><see cref="GroupAttribute"/>: <c>memberOf</c> — matches AD's default. Values are full DNs
|
||||
/// (<c>CN=<Group>,OU=...,DC=...</c>); the authenticator strips the leading <c>CN=</c> RDN value and uses
|
||||
/// that as the lookup key in <see cref="GroupToRole"/>.</item>
|
||||
/// <item><see cref="GroupToRole"/>: maps your AD group common-names to OPC UA roles — e.g.
|
||||
/// <c>{"OPCUA-Operators" : "WriteOperate", "OPCUA-Engineers" : "WriteConfigure"}</c>.</item>
|
||||
/// </list>
|
||||
/// <para>
|
||||
/// Nested groups are <b>not</b> expanded — AD's <c>tokenGroups</c> / <c>LDAP_MATCHING_RULE_IN_CHAIN</c>
|
||||
/// membership-chain filter isn't used. Assign users directly to the role-mapped groups, or pre-flatten
|
||||
/// membership in your directory. If nested expansion becomes a requirement, it's an authenticator
|
||||
/// enhancement (not a config change).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class LdapOptions
|
||||
{
|
||||
public bool Enabled { get; init; } = false;
|
||||
public string Server { get; init; } = "localhost";
|
||||
public int Port { get; init; } = 3893;
|
||||
public bool UseTls { get; init; } = false;
|
||||
|
||||
/// <summary>Dev-only escape hatch — must be false in production.</summary>
|
||||
public bool AllowInsecureLdap { get; init; } = true;
|
||||
|
||||
public string SearchBase { get; init; } = "dc=lmxopcua,dc=local";
|
||||
public string ServiceAccountDn { get; init; } = string.Empty;
|
||||
public string ServiceAccountPassword { get; init; } = string.Empty;
|
||||
public string DisplayNameAttribute { get; init; } = "cn";
|
||||
public string GroupAttribute { get; init; } = "memberOf";
|
||||
|
||||
/// <summary>
|
||||
/// LDAP attribute used to match a login name against user entries in the directory.
|
||||
/// Defaults to <c>uid</c> (RFC 2307). Common overrides:
|
||||
/// <list type="bullet">
|
||||
/// <item><c>sAMAccountName</c> — Active Directory, classic NT-style login names (e.g. <c>jdoe</c>).</item>
|
||||
/// <item><c>userPrincipalName</c> — Active Directory, email-style (e.g. <c>jdoe@corp.example.com</c>).</item>
|
||||
/// <item><c>cn</c> — GLAuth + some OpenLDAP deployments where users are keyed by common-name.</item>
|
||||
/// </list>
|
||||
/// Used only when <see cref="ServiceAccountDn"/> is non-empty (search-then-bind path) —
|
||||
/// direct-bind fallback constructs the DN as <c>cn=<name>,<SearchBase></c>
|
||||
/// regardless of this setting and is not a production-grade path against AD.
|
||||
/// </summary>
|
||||
public string UserNameAttribute { get; init; } = "uid";
|
||||
|
||||
/// <summary>
|
||||
/// LDAP group → OPC UA role. Each authenticated user gets every role whose source group
|
||||
/// is in their membership list. Recognized role names (CLAUDE.md): <c>ReadOnly</c> (browse
|
||||
/// + read), <c>WriteOperate</c>, <c>WriteTune</c>, <c>WriteConfigure</c>, <c>AlarmAck</c>.
|
||||
/// </summary>
|
||||
public Dictionary<string, string> GroupToRole { get; init; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
@@ -0,0 +1,151 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Novell.Directory.Ldap;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IUserAuthenticator"/> that binds to the configured LDAP directory to validate
|
||||
/// the (username, password) pair, then pulls group membership and maps to OPC UA roles.
|
||||
/// Mirrors the bind-then-search pattern in <c>Admin.Security.LdapAuthService</c> but stays
|
||||
/// in the Server project so the Server process doesn't take a cross-app dependency on Admin.
|
||||
/// </summary>
|
||||
public sealed class LdapUserAuthenticator(LdapOptions options, ILogger<LdapUserAuthenticator> logger)
|
||||
: IUserAuthenticator
|
||||
{
|
||||
public async Task<UserAuthResult> AuthenticateAsync(string username, string password, CancellationToken ct = default)
|
||||
{
|
||||
if (!options.Enabled)
|
||||
return new UserAuthResult(false, null, [], [], "LDAP authentication disabled");
|
||||
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
|
||||
return new UserAuthResult(false, null, [], [], "Credentials required");
|
||||
|
||||
if (!options.UseTls && !options.AllowInsecureLdap)
|
||||
return new UserAuthResult(false, null, [], [],
|
||||
"Insecure LDAP is disabled. Set UseTls or AllowInsecureLdap for dev/test.");
|
||||
|
||||
try
|
||||
{
|
||||
using var conn = new LdapConnection();
|
||||
if (options.UseTls) conn.SecureSocketLayer = true;
|
||||
await Task.Run(() => conn.Connect(options.Server, options.Port), ct);
|
||||
|
||||
var bindDn = await ResolveUserDnAsync(conn, username, ct);
|
||||
await Task.Run(() => conn.Bind(bindDn, password), ct);
|
||||
|
||||
// Rebind as service account for attribute read, if configured — otherwise the just-
|
||||
// bound user reads their own entry (works when ACL permits self-read).
|
||||
if (!string.IsNullOrWhiteSpace(options.ServiceAccountDn))
|
||||
await Task.Run(() => conn.Bind(options.ServiceAccountDn, options.ServiceAccountPassword), ct);
|
||||
|
||||
var displayName = username;
|
||||
var groups = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
var filter = $"(cn={EscapeLdapFilter(username)})";
|
||||
var results = await Task.Run(() =>
|
||||
conn.Search(options.SearchBase, LdapConnection.ScopeSub, filter, attrs: null, typesOnly: false), ct);
|
||||
|
||||
while (results.HasMore())
|
||||
{
|
||||
try
|
||||
{
|
||||
var entry = results.Next();
|
||||
var name = entry.GetAttribute(options.DisplayNameAttribute);
|
||||
if (name is not null) displayName = name.StringValue;
|
||||
|
||||
var groupAttr = entry.GetAttribute(options.GroupAttribute);
|
||||
if (groupAttr is not null)
|
||||
{
|
||||
foreach (var groupDn in groupAttr.StringValueArray)
|
||||
groups.Add(ExtractFirstRdnValue(groupDn));
|
||||
}
|
||||
|
||||
// GLAuth fallback: primary group is encoded as the ou= RDN above cn=.
|
||||
if (groups.Count == 0 && !string.IsNullOrEmpty(entry.Dn))
|
||||
{
|
||||
var primary = ExtractOuSegment(entry.Dn);
|
||||
if (primary is not null) groups.Add(primary);
|
||||
}
|
||||
}
|
||||
catch (LdapException) { break; }
|
||||
}
|
||||
}
|
||||
catch (LdapException ex)
|
||||
{
|
||||
logger.LogWarning(ex, "LDAP attribute lookup failed for {User}", username);
|
||||
}
|
||||
|
||||
conn.Disconnect();
|
||||
|
||||
var roles = groups
|
||||
.Where(g => options.GroupToRole.ContainsKey(g))
|
||||
.Select(g => options.GroupToRole[g])
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return new UserAuthResult(true, displayName, roles, groups, null);
|
||||
}
|
||||
catch (LdapException ex)
|
||||
{
|
||||
logger.LogInformation("LDAP bind rejected user {User}: {Reason}", username, ex.ResultCode);
|
||||
return new UserAuthResult(false, null, [], [], "Invalid username or password");
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogError(ex, "Unexpected LDAP error for {User}", username);
|
||||
return new UserAuthResult(false, null, [], [], "Authentication error");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string> ResolveUserDnAsync(LdapConnection conn, string username, CancellationToken ct)
|
||||
{
|
||||
if (username.Contains('=')) return username; // caller passed a DN directly
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.ServiceAccountDn))
|
||||
{
|
||||
await Task.Run(() => conn.Bind(options.ServiceAccountDn, options.ServiceAccountPassword), ct);
|
||||
|
||||
var filter = $"({options.UserNameAttribute}={EscapeLdapFilter(username)})";
|
||||
var results = await Task.Run(() =>
|
||||
conn.Search(options.SearchBase, LdapConnection.ScopeSub, filter, ["dn"], false), ct);
|
||||
|
||||
if (results.HasMore())
|
||||
return results.Next().Dn;
|
||||
|
||||
throw new LdapException("User not found", LdapException.NoSuchObject,
|
||||
$"No entry for uid={username}");
|
||||
}
|
||||
|
||||
return string.IsNullOrWhiteSpace(options.SearchBase)
|
||||
? $"cn={username}"
|
||||
: $"cn={username},{options.SearchBase}";
|
||||
}
|
||||
|
||||
internal static string EscapeLdapFilter(string input) =>
|
||||
input.Replace("\\", "\\5c")
|
||||
.Replace("*", "\\2a")
|
||||
.Replace("(", "\\28")
|
||||
.Replace(")", "\\29")
|
||||
.Replace("\0", "\\00");
|
||||
|
||||
internal static string? ExtractOuSegment(string dn)
|
||||
{
|
||||
foreach (var segment in dn.Split(','))
|
||||
{
|
||||
var trimmed = segment.Trim();
|
||||
if (trimmed.StartsWith("ou=", StringComparison.OrdinalIgnoreCase))
|
||||
return trimmed[3..];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
internal static string ExtractFirstRdnValue(string dn)
|
||||
{
|
||||
var eq = dn.IndexOf('=');
|
||||
if (eq < 0) return dn;
|
||||
var valueStart = eq + 1;
|
||||
var comma = dn.IndexOf(',', valueStart);
|
||||
return comma > valueStart ? dn[valueStart..comma] : dn[valueStart..];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using System.Collections.Frozen;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a driver-side full reference (e.g. <c>"TestMachine_001/Oven/SetPoint"</c>) to the
|
||||
/// <see cref="NodeScope"/> the Phase 6.2 evaluator walks. Supports two modes:
|
||||
/// <list type="bullet">
|
||||
/// <item>
|
||||
/// <b>Cluster-only (pre-ADR-001)</b> — when no path index is supplied the resolver
|
||||
/// returns a flat <c>ClusterId + TagId</c> scope. Sufficient while the
|
||||
/// Config-DB-driven Equipment walker isn't live; Cluster-level grants cascade to every
|
||||
/// tag below per decision #129, so finer per-Equipment grants are effectively
|
||||
/// cluster-wide at dispatch.
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// <b>Full-path (post-ADR-001 Task B)</b> — when an index is supplied, the resolver
|
||||
/// joins the full reference against the index to produce a complete
|
||||
/// <c>Cluster → Namespace → UnsArea → UnsLine → Equipment → Tag</c> scope. Unblocks
|
||||
/// per-Equipment / per-UnsLine ACL grants at the dispatch layer.
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>The index is pre-loaded by the Server bootstrap against the published generation;
|
||||
/// the resolver itself does no live DB access. Resolve is O(1) dictionary lookup on the
|
||||
/// hot path; the fallback for unknown fullReference strings produces the same cluster-only
|
||||
/// scope the pre-ADR-001 resolver returned — new tags picked up via driver discovery but
|
||||
/// not yet indexed (e.g. between a DiscoverAsync result and the next generation publish)
|
||||
/// stay addressable without a scope-resolver crash.</para>
|
||||
///
|
||||
/// <para>Thread-safety: both constructor paths freeze inputs into immutable state. Callers
|
||||
/// may cache a single instance per DriverNodeManager without locks. Swap atomically on
|
||||
/// generation change via the server's publish pipeline.</para>
|
||||
/// </remarks>
|
||||
public sealed class NodeScopeResolver
|
||||
{
|
||||
private readonly string _clusterId;
|
||||
private readonly FrozenDictionary<string, NodeScope>? _index;
|
||||
|
||||
/// <summary>Cluster-only resolver — pre-ADR-001 behavior. Kept for Server processes that
|
||||
/// haven't wired the Config-DB snapshot flow yet.</summary>
|
||||
public NodeScopeResolver(string clusterId)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
_clusterId = clusterId;
|
||||
_index = null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Full-path resolver (ADR-001 Task B). <paramref name="pathIndex"/> maps each known
|
||||
/// driver-side full reference to its pre-resolved <see cref="NodeScope"/> carrying
|
||||
/// every UNS level populated. Entries are typically produced by joining
|
||||
/// <c>Tag → Equipment → UnsLine → UnsArea</c> rows of the published generation against
|
||||
/// the driver's discovered full references (or against <c>Tag.TagConfig</c> directly
|
||||
/// when the walker is config-primary per ADR-001 Option A).
|
||||
/// </summary>
|
||||
public NodeScopeResolver(string clusterId, IReadOnlyDictionary<string, NodeScope> pathIndex)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
ArgumentNullException.ThrowIfNull(pathIndex);
|
||||
_clusterId = clusterId;
|
||||
_index = pathIndex.ToFrozenDictionary(StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve a node scope for the given driver-side <paramref name="fullReference"/>.
|
||||
/// Returns the indexed full-path scope when available; falls back to cluster-only
|
||||
/// (TagId populated only) when the index is absent or the reference isn't indexed.
|
||||
/// The fallback is the same shape the pre-ADR-001 resolver produced, so the authz
|
||||
/// evaluator behaves identically for un-indexed references.
|
||||
/// </summary>
|
||||
public NodeScope Resolve(string fullReference)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(fullReference);
|
||||
|
||||
if (_index is not null && _index.TryGetValue(fullReference, out var indexed))
|
||||
return indexed;
|
||||
|
||||
return new NodeScope
|
||||
{
|
||||
ClusterId = _clusterId,
|
||||
TagId = fullReference,
|
||||
Kind = NodeHierarchyKind.Equipment,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Builds the <see cref="NodeScope"/> path index consumed by <see cref="NodeScopeResolver"/>
|
||||
/// from a Config-DB snapshot of a single published generation. Runs once per generation
|
||||
/// (or on every generation change) at the Server bootstrap layer; the produced index is
|
||||
/// immutable + hot-path readable per ADR-001 Task B.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>The index key is the driver-side full reference (<c>Tag.TagConfig</c>) — the same
|
||||
/// string the dispatch layer passes to <see cref="NodeScopeResolver.Resolve"/>. The value
|
||||
/// is a <see cref="NodeScope"/> with every UNS level populated:
|
||||
/// <c>ClusterId / NamespaceId / UnsAreaId / UnsLineId / EquipmentId / TagId</c>. Tag rows
|
||||
/// with null <c>EquipmentId</c> (SystemPlatform-namespace Galaxy tags per decision #120)
|
||||
/// are excluded from the index — the cluster-only fallback path in the resolver handles
|
||||
/// them without needing an index entry.</para>
|
||||
///
|
||||
/// <para>Duplicate keys are not expected but would be indicative of corrupt data — the
|
||||
/// builder throws <see cref="InvalidOperationException"/> on collision so a config drift
|
||||
/// surfaces at bootstrap instead of producing silently-last-wins scopes at dispatch.</para>
|
||||
/// </remarks>
|
||||
public static class ScopePathIndexBuilder
|
||||
{
|
||||
/// <summary>
|
||||
/// Build a fullReference → NodeScope index from the four Config-DB collections for a
|
||||
/// single namespace. Callers must filter inputs to a single
|
||||
/// <see cref="Namespace"/> + the same <see cref="ConfigGeneration"/> upstream.
|
||||
/// </summary>
|
||||
/// <param name="clusterId">Owning cluster — populates <see cref="NodeScope.ClusterId"/>.</param>
|
||||
/// <param name="namespaceId">Owning namespace — populates <see cref="NodeScope.NamespaceId"/>.</param>
|
||||
/// <param name="content">Pre-loaded rows for the namespace.</param>
|
||||
public static IReadOnlyDictionary<string, NodeScope> Build(
|
||||
string clusterId,
|
||||
string namespaceId,
|
||||
EquipmentNamespaceContent content)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(namespaceId);
|
||||
ArgumentNullException.ThrowIfNull(content);
|
||||
|
||||
var areaByLine = content.Lines.ToDictionary(l => l.UnsLineId, l => l.UnsAreaId, StringComparer.OrdinalIgnoreCase);
|
||||
var lineByEquipment = content.Equipment.ToDictionary(e => e.EquipmentId, e => e.UnsLineId, StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var index = new Dictionary<string, NodeScope>(StringComparer.Ordinal);
|
||||
|
||||
foreach (var tag in content.Tags)
|
||||
{
|
||||
// Null EquipmentId = SystemPlatform-namespace tag per decision #110 — skip; the
|
||||
// cluster-only resolver fallback handles those without needing an index entry.
|
||||
if (string.IsNullOrEmpty(tag.EquipmentId)) continue;
|
||||
|
||||
// Broken FK — Tag references a missing Equipment row. Skip rather than crash;
|
||||
// sp_ValidateDraft should have caught this at publish, so any drift here is
|
||||
// unexpected but non-fatal.
|
||||
if (!lineByEquipment.TryGetValue(tag.EquipmentId, out var lineId)) continue;
|
||||
if (!areaByLine.TryGetValue(lineId, out var areaId)) continue;
|
||||
|
||||
var scope = new NodeScope
|
||||
{
|
||||
ClusterId = clusterId,
|
||||
NamespaceId = namespaceId,
|
||||
UnsAreaId = areaId,
|
||||
UnsLineId = lineId,
|
||||
EquipmentId = tag.EquipmentId,
|
||||
TagId = tag.TagConfig,
|
||||
Kind = NodeHierarchyKind.Equipment,
|
||||
};
|
||||
|
||||
if (!index.TryAdd(tag.TagConfig, scope))
|
||||
throw new InvalidOperationException(
|
||||
$"Duplicate fullReference '{tag.TagConfig}' in Equipment namespace '{namespaceId}'. " +
|
||||
"Config data is corrupt — two Tag rows produced the same wire-level address.");
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Server-layer write-authorization policy. ACL enforcement lives here — drivers report
|
||||
/// <see cref="SecurityClassification"/> as discovery metadata only; the server decides
|
||||
/// whether a given session is allowed to write a given attribute by checking the session's
|
||||
/// roles (resolved at login via <see cref="LdapUserAuthenticator"/>) against the required
|
||||
/// role for the attribute's classification.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Matches the table in <c>docs/Configuration.md</c>:
|
||||
/// <list type="bullet">
|
||||
/// <item><c>FreeAccess</c>: no role required — anonymous sessions can write (matches v1 default).</item>
|
||||
/// <item><c>Operate</c> / <c>SecuredWrite</c>: <c>WriteOperate</c> role required.</item>
|
||||
/// <item><c>Tune</c>: <c>WriteTune</c> role required.</item>
|
||||
/// <item><c>VerifiedWrite</c> / <c>Configure</c>: <c>WriteConfigure</c> role required.</item>
|
||||
/// <item><c>ViewOnly</c>: no role grants write access.</item>
|
||||
/// </list>
|
||||
/// <c>AlarmAck</c> is checked at the alarm-acknowledge path, not here.
|
||||
/// </remarks>
|
||||
public static class WriteAuthzPolicy
|
||||
{
|
||||
public const string RoleWriteOperate = "WriteOperate";
|
||||
public const string RoleWriteTune = "WriteTune";
|
||||
public const string RoleWriteConfigure = "WriteConfigure";
|
||||
|
||||
/// <summary>
|
||||
/// Decide whether a session with <paramref name="userRoles"/> is allowed to write to an
|
||||
/// attribute with the given <paramref name="classification"/>. Returns true for
|
||||
/// <c>FreeAccess</c> regardless of roles (including empty / anonymous sessions) and
|
||||
/// false for <c>ViewOnly</c> regardless of roles. Every other classification requires
|
||||
/// the session to carry the mapped role — case-insensitive match.
|
||||
/// </summary>
|
||||
public static bool IsAllowed(SecurityClassification classification, IReadOnlyCollection<string> userRoles)
|
||||
{
|
||||
if (classification == SecurityClassification.FreeAccess) return true;
|
||||
if (classification == SecurityClassification.ViewOnly) return false;
|
||||
|
||||
var required = RequiredRole(classification);
|
||||
if (required is null) return false;
|
||||
|
||||
foreach (var r in userRoles)
|
||||
{
|
||||
if (string.Equals(r, required, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Required role for a classification, or null when no role grants access
|
||||
/// (<see cref="SecurityClassification.ViewOnly"/>) or no role is needed
|
||||
/// (<see cref="SecurityClassification.FreeAccess"/> — also returns null; callers use
|
||||
/// <see cref="IsAllowed"/> which handles the special-cases rather than branching on
|
||||
/// null themselves).
|
||||
/// </summary>
|
||||
public static string? RequiredRole(SecurityClassification classification) => classification switch
|
||||
{
|
||||
SecurityClassification.FreeAccess => null, // IsAllowed short-circuits
|
||||
SecurityClassification.Operate => RoleWriteOperate,
|
||||
SecurityClassification.SecuredWrite => RoleWriteOperate,
|
||||
SecurityClassification.Tune => RoleWriteTune,
|
||||
SecurityClassification.VerifiedWrite => RoleWriteConfigure,
|
||||
SecurityClassification.Configure => RoleWriteConfigure,
|
||||
SecurityClassification.ViewOnly => null, // IsAllowed short-circuits
|
||||
_ => null,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Maps a driver-reported <see cref="SecurityClassification"/> to the
|
||||
/// <see cref="Core.Abstractions.OpcUaOperation"/> the Phase 6.2 evaluator consults
|
||||
/// for the matching <see cref="Configuration.Enums.NodePermissions"/> bit.
|
||||
/// FreeAccess + ViewOnly fall back to WriteOperate — the evaluator never sees them
|
||||
/// because <see cref="IsAllowed"/> short-circuits first.
|
||||
/// </summary>
|
||||
public static Core.Abstractions.OpcUaOperation ToOpcUaOperation(SecurityClassification classification) =>
|
||||
classification switch
|
||||
{
|
||||
SecurityClassification.Operate => Core.Abstractions.OpcUaOperation.WriteOperate,
|
||||
SecurityClassification.SecuredWrite => Core.Abstractions.OpcUaOperation.WriteOperate,
|
||||
SecurityClassification.Tune => Core.Abstractions.OpcUaOperation.WriteTune,
|
||||
SecurityClassification.VerifiedWrite => Core.Abstractions.OpcUaOperation.WriteConfigure,
|
||||
SecurityClassification.Configure => Core.Abstractions.OpcUaOperation.WriteConfigure,
|
||||
_ => Core.Abstractions.OpcUaOperation.WriteOperate,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Server</RootNamespace>
|
||||
<AssemblyName>OtOpcUa.Server</AssemblyName>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.0"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.0"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.0"/>
|
||||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="10.0.0"/>
|
||||
<PackageReference Include="Serilog.Extensions.Hosting" Version="9.0.0"/>
|
||||
<PackageReference Include="Serilog.Settings.Configuration" Version="9.0.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.0.0"/>
|
||||
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0"/>
|
||||
<PackageReference Include="Serilog.Formatting.Compact" Version="3.0.0"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.374.126"/>
|
||||
<PackageReference Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0"/>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.0"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core\ZB.MOM.WW.OtOpcUa.Core.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Scripting\ZB.MOM.WW.OtOpcUa.Core.Scripting.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.VirtualTags\ZB.MOM.WW.OtOpcUa.Core.VirtualTags.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms\ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.csproj"/>
|
||||
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Galaxy\ZB.MOM.WW.OtOpcUa.Driver.Galaxy.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.FOCAS\ZB.MOM.WW.OtOpcUa.Driver.FOCAS.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Modbus\ZB.MOM.WW.OtOpcUa.Driver.Modbus.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.AbCip\ZB.MOM.WW.OtOpcUa.Driver.AbCip.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.S7\ZB.MOM.WW.OtOpcUa.Driver.S7.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.csproj"/>
|
||||
<ProjectReference Include="..\..\Drivers\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.csproj"/>
|
||||
<ProjectReference Include="..\..\Tooling\ZB.MOM.WW.OtOpcUa.Analyzers\ZB.MOM.WW.OtOpcUa.Analyzers.csproj"
|
||||
OutputItemType="Analyzer" ReferenceOutputAssembly="false"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Server.Tests"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="appsettings.json" CopyToOutputDirectory="PreserveNewest"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-37gx-xxp4-5rgx"/>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-w3x6-4m5h-cxqf"/>
|
||||
<!-- OPCFoundation.NetStandard.Opc.Ua.Core advisory — v1 already uses this package at the
|
||||
same version, risk already accepted in the v1 stack. -->
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-h958-fxgg-g7w3"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
26
src/Server/ZB.MOM.WW.OtOpcUa.Server/appsettings.json
Normal file
26
src/Server/ZB.MOM.WW.OtOpcUa.Server/appsettings.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"Serilog": {
|
||||
"MinimumLevel": "Information"
|
||||
},
|
||||
"Node": {
|
||||
"NodeId": "node-dev-a",
|
||||
"ClusterId": "cluster-dev",
|
||||
"ConfigDbConnectionString": "Server=10.100.0.35,14330;Database=OtOpcUaConfig;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;",
|
||||
"LocalCachePath": "config_cache.db"
|
||||
},
|
||||
"Historian": {
|
||||
"Wonderware": {
|
||||
"Enabled": false,
|
||||
"PipeName": "OtOpcUaWonderwareHistorian",
|
||||
"SharedSecret": "",
|
||||
"PeerName": "OtOpcUa-node-dev-a",
|
||||
"DriverInstancePrefix": "galaxy",
|
||||
"ConnectTimeoutSeconds": 10,
|
||||
"CallTimeoutSeconds": 30
|
||||
}
|
||||
},
|
||||
"Galaxy": {
|
||||
"$comment": "PR 7.1 — DefaultBackend selects which factory tooling and migration scripts pick when authoring NEW Galaxy DriverInstance rows. Both factories register at startup so existing rows keep working: 'GalaxyMxGateway' (default since PR 7.1, in-process .NET 10 driver over the mxaccessgw gRPC gateway) and 'Galaxy' (legacy out-of-process Galaxy.Host EXE + named pipe). Legacy registration retires in PR 7.2 once the parity matrix in docs/v2/Galaxy.ParityMatrix.md is fully green on the parity rig.",
|
||||
"DefaultBackend": "GalaxyMxGateway"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user