task-galaxy-e2e branch — non-FOCAS work-in-progress snapshot
Catch-all commit for pending work on the task-galaxy-e2e branch that
wasn't part of the FOCAS migration. Grouping by topic so future per-topic
commits can be cherry-picked if needed.
TwinCAT
- src/.../Driver.TwinCAT/AdsTwinCATClient.cs + TwinCATDriverFactoryExtensions.cs:
factory-registration extensions + ADS client refinements.
- src/.../Driver.TwinCAT.Cli/Commands/BrowseCommand.cs: new browse command
for the TwinCAT test-client CLI.
- tests/.../Driver.TwinCAT.IntegrationTests/TwinCAT3SmokeTests.cs + TwinCatProject/:
fixture scaffold with a minimal POU + README pointing at the TCBSD/ESXi
VM for e2e.
- docs/Driver.TwinCAT.Cli.md + docs/drivers/TwinCAT-Test-Fixture.md:
documentation for the above.
- docs/v3/twincat-backlog.md: forward-looking backlog seed.
Admin UI + fleet status
- src/.../Admin/Components/Pages/Clusters/DriversTab.razor + Hosts.razor:
UI refresh for fleet-status rendering.
- src/.../Admin/Hubs/FleetStatusHub.cs + FleetStatusPoller.cs +
Admin/Program.cs: SignalR hub + poller plumbing for live fleet data.
- tests/.../Admin.Tests/FleetStatusPollerTests.cs: poller coverage.
Server + redundancy runtime (Phase 6.3 follow-ups)
- src/.../Server/Hosting/RedundancyPublisherHostedService.cs: HostedService
that owns the RedundancyStatePublisher lifecycle + wires peer reachability.
- src/.../Server/Redundancy/ServerRedundancyNodeWriter.cs: OPC UA
variable-node writer binding ServiceLevel + ServerUriArray to the
publisher's events.
- src/.../Server/Program.cs + Server.csproj: hosted-service registration.
- tests/.../Server.Tests/ServerRedundancyNodeWriterTests.cs +
Server.Tests.csproj: coverage for the above.
Configuration
- src/.../Configuration/Validation/DraftValidator.cs +
tests/.../Configuration.Tests/DraftValidatorTests.cs: draft-validation
refinements.
E2E scripts (shared infrastructure)
- scripts/e2e/README.md + _common.ps1 + test-all.ps1: shared helpers + the
all-drivers test-all runner.
- scripts/e2e/test-opcuaclient.ps1: OPC UA Client e2e runner.
Docs
- docs/v2/implementation/phase-6-{1,2,3,4}*.md + exit-gate-phase-{3,7}.md:
phase-gate + implementation doc updates.
- docs/v2/plan.md: top-level plan refresh.
- docs/v2/redundancy-interop-playbook.md: client interop playbook for the
Phase 6.3 redundancy-runtime work.
Two orphan FOCAS docs remain on disk but deliberately unstaged —
docs/v2/focas-deployment.md and docs/v2/implementation/focas-simulator-plan.md
describe the now-retired Tier-C topology and should either be rewritten
or deleted in a follow-up.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,119 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream C (task #147) glue — drives <see cref="RedundancyStatePublisher"/> on
|
||||
/// a periodic tick and pushes the resulting ServiceLevel / ServerUriArray /
|
||||
/// RedundancySupport values onto the OPC UA Server node via
|
||||
/// <see cref="ServerRedundancyNodeWriter"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The OPC UA <c>ServerObject</c> exists only after <c>StandardServer.OnServerStarted</c>
|
||||
/// has run, which is inside <see cref="OpcUaApplicationHost.StartAsync"/>. This hosted
|
||||
/// service polls for <c>host.Server?.CurrentInstance</c> to become non-null before
|
||||
/// binding the writer — the server boot sequence doesn't expose a "ready" event.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Tick cadence is 1 s by default. The publisher is edge-triggered internally so a
|
||||
/// no-change tick is cheap; the writer is also idempotent so we can safely apply the
|
||||
/// same values every tick without generating spurious OPC UA notifications.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class RedundancyPublisherHostedService(
|
||||
OpcUaApplicationHost host,
|
||||
RedundancyStatePublisher publisher,
|
||||
RedundancyCoordinator coordinator,
|
||||
ILogger<RedundancyPublisherHostedService> logger,
|
||||
ILoggerFactory loggerFactory) : BackgroundService
|
||||
{
|
||||
public TimeSpan TickInterval { get; init; } = TimeSpan.FromSeconds(1);
|
||||
public TimeSpan ServerReadyPollInterval { get; init; } = TimeSpan.FromMilliseconds(250);
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
// 0. Load topology from the shared config DB. RefreshAsync (not InitializeAsync)
|
||||
// so an invariant violation degrades to ServiceLevelBand.InvalidTopology rather
|
||||
// than crashing the hosted service — operator visibility beats fail-fast here.
|
||||
await coordinator.RefreshAsync(stoppingToken).ConfigureAwait(false);
|
||||
|
||||
// 1. Wait for OPC UA server's ServerObject to materialize.
|
||||
var writer = await WaitForServerReadyAsync(stoppingToken).ConfigureAwait(false);
|
||||
if (writer is null) return; // cancelled before startup completed
|
||||
|
||||
// 2. Subscribe writer to publisher events — edge-triggered ServiceLevel +
|
||||
// ServerUriArray updates from the publisher fan out onto the Server node.
|
||||
publisher.OnStateChanged += OnServiceLevelChanged;
|
||||
publisher.OnServerUriArrayChanged += OnServerUriArrayChanged;
|
||||
|
||||
// 3. One-time RedundancySupport from the coordinator's current topology. If the
|
||||
// topology isn't loaded yet, we'll retry on the first compute-publish tick.
|
||||
ApplyRedundancySupportIfKnown(writer);
|
||||
|
||||
logger.LogInformation(
|
||||
"RedundancyPublisherHostedService running — tick every {Tick}ms",
|
||||
TickInterval.TotalMilliseconds);
|
||||
|
||||
try
|
||||
{
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
publisher.ComputeAndPublish();
|
||||
ApplyRedundancySupportIfKnown(writer); // cheap + idempotent
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
logger.LogWarning(ex, "RedundancyStatePublisher tick failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(TickInterval, stoppingToken).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
publisher.OnStateChanged -= OnServiceLevelChanged;
|
||||
publisher.OnServerUriArrayChanged -= OnServerUriArrayChanged;
|
||||
}
|
||||
|
||||
void OnServiceLevelChanged(ServiceLevelSnapshot snap) => writer.ApplyServiceLevel(snap.Value);
|
||||
void OnServerUriArrayChanged(IReadOnlyList<string> uris) => writer.ApplyServerUriArray(uris);
|
||||
}
|
||||
|
||||
private async Task<ServerRedundancyNodeWriter?> WaitForServerReadyAsync(CancellationToken ct)
|
||||
{
|
||||
// Bounded retry so a genuine failure to start doesn't pin the hosted service forever.
|
||||
// 60s is generous — production boot is ~2s on this box; cert PKI + certificate-creation
|
||||
// cases have been observed to take up to 15s cold.
|
||||
var deadline = DateTime.UtcNow.AddSeconds(60);
|
||||
while (!ct.IsCancellationRequested && DateTime.UtcNow < deadline)
|
||||
{
|
||||
var serverInternal = host.Server?.CurrentInstance;
|
||||
if (serverInternal?.ServerObject is not null)
|
||||
{
|
||||
var writerLogger = loggerFactory.CreateLogger<ServerRedundancyNodeWriter>();
|
||||
return new ServerRedundancyNodeWriter(serverInternal, writerLogger);
|
||||
}
|
||||
|
||||
try { await Task.Delay(ServerReadyPollInterval, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { return null; }
|
||||
}
|
||||
|
||||
if (!ct.IsCancellationRequested)
|
||||
logger.LogError("OPC UA ServerObject did not materialize within 60s — Phase 6.3 Stream C wiring is inactive");
|
||||
return null;
|
||||
}
|
||||
|
||||
private void ApplyRedundancySupportIfKnown(ServerRedundancyNodeWriter writer)
|
||||
{
|
||||
var topology = coordinator.Current;
|
||||
if (topology is null) return;
|
||||
writer.ApplyRedundancySupport(topology.Mode);
|
||||
}
|
||||
}
|
||||
@@ -15,9 +15,12 @@ using ZB.MOM.WW.OtOpcUa.Driver.FOCAS;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Proxy;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Modbus;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.S7;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.TwinCAT;
|
||||
using ZB.MOM.WW.OtOpcUa.Server;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Phase7;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
using ZB.MOM.WW.OtOpcUa.Server.Security;
|
||||
|
||||
var builder = Host.CreateApplicationBuilder(args);
|
||||
@@ -109,6 +112,7 @@ builder.Services.AddSingleton<DriverFactoryRegistry>(_ =>
|
||||
AbCipDriverFactoryExtensions.Register(registry);
|
||||
AbLegacyDriverFactoryExtensions.Register(registry);
|
||||
S7DriverFactoryExtensions.Register(registry);
|
||||
TwinCATDriverFactoryExtensions.Register(registry);
|
||||
return registry;
|
||||
});
|
||||
builder.Services.AddSingleton<DriverInstanceBootstrapper>();
|
||||
@@ -137,8 +141,29 @@ builder.Services.AddHostedService<OpcUaServerService>();
|
||||
// so per-heartbeat change-tracking stays isolated; publisher opens one scope per tick.
|
||||
builder.Services.AddDbContext<OtOpcUaConfigDbContext>(opt =>
|
||||
opt.UseSqlServer(options.ConfigDbConnectionString));
|
||||
// Additional pooled factory so Phase 6.3 RedundancyCoordinator (singleton) can create its
|
||||
// own scoped DbContext for topology loading without fighting the scoped HostStatusPublisher.
|
||||
builder.Services.AddDbContextFactory<OtOpcUaConfigDbContext>(opt =>
|
||||
opt.UseSqlServer(options.ConfigDbConnectionString));
|
||||
builder.Services.AddHostedService<HostStatusPublisher>();
|
||||
|
||||
// Phase 6.3 Stream C (task #147) — ServiceLevel + ServerUriArray + RedundancySupport node
|
||||
// wiring. Coordinator holds topology; publisher computes ServiceLevel byte + ServerUriArray;
|
||||
// hosted service ticks publisher + pushes values onto the Server object via the node writer.
|
||||
builder.Services.AddSingleton(sp => new RedundancyCoordinator(
|
||||
sp.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>(),
|
||||
sp.GetRequiredService<ILogger<RedundancyCoordinator>>(),
|
||||
options.NodeId, options.ClusterId));
|
||||
builder.Services.AddSingleton<ApplyLeaseRegistry>();
|
||||
builder.Services.AddSingleton<RecoveryStateManager>();
|
||||
builder.Services.AddSingleton<PeerReachabilityTracker>();
|
||||
builder.Services.AddSingleton(sp => new RedundancyStatePublisher(
|
||||
sp.GetRequiredService<RedundancyCoordinator>(),
|
||||
sp.GetRequiredService<ApplyLeaseRegistry>(),
|
||||
sp.GetRequiredService<RecoveryStateManager>(),
|
||||
sp.GetRequiredService<PeerReachabilityTracker>()));
|
||||
builder.Services.AddHostedService<RedundancyPublisherHostedService>();
|
||||
|
||||
// Phase 7 follow-up #246 — historian sink + engine composer. NullAlarmHistorianSink
|
||||
// is the default until the Galaxy.Host SqliteStoreAndForwardSink writer adapter
|
||||
// lands (task #248). The composer reads Script/VirtualTag/ScriptedAlarm rows on
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Opc.Ua;
|
||||
using Opc.Ua.Server;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ConfigRedundancyMode = ZB.MOM.WW.OtOpcUa.Configuration.Enums.RedundancyMode;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Redundancy;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.3 Stream C (task #147) — the seam that carries the
|
||||
/// <see cref="RedundancyStatePublisher"/>'s computed values onto the standard OPC UA
|
||||
/// Server object nodes:
|
||||
/// <list type="bullet">
|
||||
/// <item><c>Server.ServiceLevel</c> (<see cref="VariableIds.Server_ServiceLevel"/>)
|
||||
/// — Byte (0..255), Part 5 §6.3.34. Clients poll to pick the healthiest peer.</item>
|
||||
/// <item><c>Server.ServerRedundancy.RedundancySupport</c>
|
||||
/// (<see cref="VariableIds.Server_ServerRedundancy_RedundancySupport"/>)
|
||||
/// — advertises Warm / Hot / Cold / None per Part 4 §6.6.2.</item>
|
||||
/// <item><c>Server.ServerRedundancy.ServerUriArray</c>
|
||||
/// (<see cref="VariableIds.NonTransparentRedundancyType_ServerUriArray"/>
|
||||
/// when the redundancy node is upgraded to non-transparent)
|
||||
/// — ApplicationUri of every node in the pair, self first.</item>
|
||||
/// </list>
|
||||
/// The writer is constructed once during the <c>OtOpcUaServer.OnServerStarted</c> hook;
|
||||
/// callers invoke <see cref="ApplyServiceLevel"/> / <see cref="ApplyServerUriArray"/> /
|
||||
/// <see cref="ApplyRedundancySupport"/> on publisher events. Each setter updates the
|
||||
/// underlying <see cref="BaseVariableState.Value"/> then calls
|
||||
/// <see cref="NodeState.ClearChangeMasks"/> to flush the change to subscribers.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The writer is defensive: if the expected node shape isn't present on this particular
|
||||
/// SDK build (e.g. <c>ServerUriArray</c> only exists on the
|
||||
/// <c>NonTransparentRedundancyType</c> subtype and the ServerObject's default
|
||||
/// <c>ServerRedundancy</c> property is the base type) the writer logs a warning once and
|
||||
/// skips that specific update rather than throwing — matches the SDK's own tolerance
|
||||
/// for optional address-space shape.
|
||||
/// </remarks>
|
||||
public sealed class ServerRedundancyNodeWriter
|
||||
{
|
||||
private readonly IServerInternal _server;
|
||||
private readonly ILogger<ServerRedundancyNodeWriter> _logger;
|
||||
private readonly object _gate = new();
|
||||
|
||||
private bool _warnedMissingServerUriArray;
|
||||
private byte? _lastServiceLevel;
|
||||
private RedundancySupport? _lastRedundancySupport;
|
||||
private IReadOnlyList<string>? _lastServerUriArray;
|
||||
|
||||
public ServerRedundancyNodeWriter(IServerInternal server, ILogger<ServerRedundancyNodeWriter> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(server);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
_server = server;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Push a new Byte value onto <c>Server.ServiceLevel</c> + notify subscribers.</summary>
|
||||
public void ApplyServiceLevel(byte value)
|
||||
{
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServiceLevel is not { } node) return;
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastServiceLevel == value) return;
|
||||
_lastServiceLevel = value;
|
||||
node.Value = value;
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Map the Configuration-side <see cref="ConfigRedundancyMode"/> to OPC UA's
|
||||
/// <see cref="RedundancySupport"/> enum + apply to
|
||||
/// <c>Server.ServerRedundancy.RedundancySupport</c>. Called once at
|
||||
/// the <c>OtOpcUaServer.OnServerStarted</c> hook — the value is effectively static per
|
||||
/// deployment.
|
||||
/// </summary>
|
||||
public void ApplyRedundancySupport(ConfigRedundancyMode mode)
|
||||
{
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServerRedundancy?.RedundancySupport is not { } node) return;
|
||||
|
||||
// RedundancyMode only declares None / Warm / Hot in v2.0 (non-transparent only per
|
||||
// decision #85). OPC UA's RedundancySupport has more states — clamp to the three we
|
||||
// support and let config-DB CHECK constraints prevent surprises.
|
||||
var support = mode switch
|
||||
{
|
||||
ConfigRedundancyMode.Warm => RedundancySupport.Warm,
|
||||
ConfigRedundancyMode.Hot => RedundancySupport.Hot,
|
||||
_ => RedundancySupport.None,
|
||||
};
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastRedundancySupport == support) return;
|
||||
_lastRedundancySupport = support;
|
||||
node.Value = support;
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Push the self-first peer-URI list onto
|
||||
/// <c>Server.ServerRedundancy.ServerUriArray</c>. Only applies when the SDK created
|
||||
/// <c>ServerRedundancy</c> as <see cref="NonTransparentRedundancyState"/>; on the
|
||||
/// base <see cref="ServerRedundancyState"/> the child is absent and we log-and-skip.
|
||||
/// </summary>
|
||||
public void ApplyServerUriArray(IReadOnlyList<string> serverUris)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(serverUris);
|
||||
var serverObject = _server.ServerObject;
|
||||
if (serverObject?.ServerRedundancy is not NonTransparentRedundancyState ntr
|
||||
|| ntr.ServerUriArray is not { } node)
|
||||
{
|
||||
if (!_warnedMissingServerUriArray)
|
||||
{
|
||||
_warnedMissingServerUriArray = true;
|
||||
_logger.LogWarning(
|
||||
"Server.ServerRedundancy is not NonTransparentRedundancyState — ServerUriArray " +
|
||||
"cannot be published on this server instance. Clients will not see peer URIs " +
|
||||
"on the Part 4 §6.6.2 redundancy node until the redundancy-object type is upgraded.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_gate)
|
||||
{
|
||||
if (_lastServerUriArray is not null && _lastServerUriArray.SequenceEqual(serverUris, StringComparer.Ordinal))
|
||||
return;
|
||||
_lastServerUriArray = [.. serverUris];
|
||||
node.Value = [.. serverUris];
|
||||
node.Timestamp = DateTime.UtcNow;
|
||||
node.ClearChangeMasks(_server.DefaultSystemContext, includeChildren: false);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,7 @@
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.AbCip\ZB.MOM.WW.OtOpcUa.Driver.AbCip.csproj"/>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.S7\ZB.MOM.WW.OtOpcUa.Driver.S7.csproj"/>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy\ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.csproj"/>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT\ZB.MOM.WW.OtOpcUa.Driver.TwinCAT.csproj"/>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Analyzers\ZB.MOM.WW.OtOpcUa.Analyzers.csproj"
|
||||
OutputItemType="Analyzer" ReferenceOutputAssembly="false"/>
|
||||
</ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user