Phase 1 Streams B–E scaffold + Phase 2 Streams A–C scaffold — 8 new projects with ~70 new tests, all green alongside the 494 v1 IntegrationTests baseline (parity preserved: no v1 tests broken; legacy OtOpcUa.Host untouched). Phase 1 finish: Configuration project (16 entities + 10 enums + DbContext + DesignTimeDbContextFactory + InitialSchema/StoredProcedures/AuthorizationGrants migrations — 8 procs including sp_PublishGeneration with MERGE on ExternalIdReservation per decision #124, sp_RollbackToGeneration cloning rows into a new published generation, sp_ValidateDraft with cross-cluster-namespace + EquipmentUuid-immutability + ZTag/SAPID reservation pre-flight, sp_ComputeGenerationDiff with CHECKSUM-based row signature — plus OtOpcUaNode/OtOpcUaAdmin SQL roles with EXECUTE grants scoped to per-principal-class proc sets and DENY UPDATE/DELETE/INSERT/SELECT on dbo schema); managed DraftValidator covering UNS segment regex, path length, EquipmentUuid immutability across generations, same-cluster namespace binding (decision #122), reservation pre-flight, EquipmentId derivation (decision #125), driver↔namespace compatibility — returning every failing rule in one pass; LiteDB local cache with round-trip + ring pruning + corruption-fast-fail; GenerationApplier with per-entity Added/Removed/Modified diff and dependency-ordered callbacks (namespace → driver → device → equipment → poll-group → tag, Removed before Added); Core project with GenericDriverNodeManager (scaffold for the Phase 2 Galaxy port) and DriverHost lifecycle registry; Server project using Microsoft.Extensions.Hosting BackgroundService replacing TopShelf, with NodeBootstrap that falls back to LiteDB cache when the central DB is unreachable (decision #79); Admin project scaffolded as Blazor Server with Bootstrap 5 sidebar layout, cookie auth, three admin roles (ConfigViewer/ConfigEditor/FleetAdmin), Cluster + Generation services fronting the stored procs. Phase 2 scaffold: Driver.Galaxy.Shared (netstandard2.0) with full MessagePack IPC contract surface — Hello version negotiation, Open/CloseSession, Heartbeat, DiscoverHierarchy + GalaxyObjectInfo/GalaxyAttributeInfo, Read/WriteValues, Subscribe/Unsubscribe/OnDataChange, AlarmSubscribe/Event/Ack, HistoryRead, HostConnectivityStatus, Recycle — plus length-prefixed framing (decision #28) with a 16 MiB cap and thread-safe FrameWriter/FrameReader; Driver.Galaxy.Host (net48) implementing the Tier C cross-cutting protections from driver-stability.md — strict PipeAcl (allow configured server SID only, explicit deny on LocalSystem + Administrators), PipeServer with caller-SID verification via pipe.RunAsClient + WindowsIdentity.GetCurrent and per-process shared-secret Hello, Galaxy-specific MemoryWatchdog (warn at max(1.5×baseline, +200 MB), soft-recycle at max(2×baseline, +200 MB), hard ceiling 1.5 GB, slope ≥5 MB/min over 30-min rolling window), RecyclePolicy (1 soft recycle per hour cap + 03:00 local daily scheduled), PostMortemMmf (1000-entry ring buffer in %ProgramData%\OtOpcUa\driver-postmortem\galaxy.mmf, survives hard crash, readable cross-process), MxAccessHandle : SafeHandle (ReleaseHandle loops Marshal.ReleaseComObject until refcount=0 then calls optional unregister callback), StaPump with responsiveness probe (BlockingCollection dispatcher for Phase 1 — real Win32 GetMessage/DispatchMessage pump slots in with the same semantics when the Galaxy code lift happens), IsExternalInit shim for init setters on .NET 4.8; Driver.Galaxy.Proxy (net10) implementing IDriver + ITagDiscovery forwarding over the IPC channel with MX data-type and security-classification mapping, plus Supervisor pieces — Backoff (5s → 15s → 60s capped, reset-on-stable-run), CircuitBreaker (3 crashes per 5 min opens; 1h → 4h → manual cooldown escalation; sticky alert doesn't auto-clear), HeartbeatMonitor (2s cadence, 3 consecutive misses = host dead per driver-stability.md). Infrastructure: docker SQL Server remapped to host port 14330 to coexist with the native MSSQL14 Galaxy ZB DB instance on 1433; NuGetAuditSuppress applied per-project for two System.Security.Cryptography.Xml advisories that only reach via EF Core Design with PrivateAssets=all (fix ships in 11.0.0-preview); .slnx gains 14 project registrations. Deferred with explicit TODOs in docs/v2/implementation/phase-2-partial-exit-evidence.md: Phase 1 Stream E Admin UI pages (Generations listing + draft-diff-publish, Equipment CRUD with OPC 40010 fields, UNS Areas/Lines tabs, ACLs + permission simulator, Generic JSON config editor, SignalR real-time, Release-Reservation + Merge-Equipment workflows, LDAP login page, AppServer smoke test per decision #142), Phase 2 Stream D (Galaxy MXAccess code lift out of legacy OtOpcUa.Host, dual-service installer, appsettings → DriverConfig migration script, legacy Host deletion — blocked by parity), Phase 2 Stream E (v1 IntegrationTests against v2 topology, Client.CLI walkthrough diff, four 2026-04-13 stability findings regression tests, adversarial review — requires live MXAccess runtime).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-17 21:35:25 -04:00
parent fc0ce36308
commit 01fd90c178
128 changed files with 12352 additions and 4 deletions

View File

@@ -0,0 +1,160 @@
using System;
using System.IO.Pipes;
using System.Security.Principal;
using System.Threading;
using System.Threading.Tasks;
using MessagePack;
using Serilog;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Shared;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Shared.Contracts;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Ipc;
/// <summary>
/// Accepts one client connection at a time on a named pipe with the strict ACL from
/// <see cref="PipeAcl"/>. Verifies the peer SID and the per-process shared secret before any
/// RPC frame is accepted. Per <c>driver-stability.md §"IPC Security"</c>.
/// </summary>
public sealed class PipeServer : IDisposable
{
private readonly string _pipeName;
private readonly SecurityIdentifier _allowedSid;
private readonly string _sharedSecret;
private readonly ILogger _logger;
private readonly CancellationTokenSource _cts = new();
private NamedPipeServerStream? _current;
public PipeServer(string pipeName, SecurityIdentifier allowedSid, string sharedSecret, ILogger logger)
{
_pipeName = pipeName ?? throw new ArgumentNullException(nameof(pipeName));
_allowedSid = allowedSid ?? throw new ArgumentNullException(nameof(allowedSid));
_sharedSecret = sharedSecret ?? throw new ArgumentNullException(nameof(sharedSecret));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Accepts one connection, performs Hello handshake, then dispatches frames to
/// <paramref name="handler"/> until EOF or cancel. Returns when the client disconnects.
/// </summary>
public async Task RunOneConnectionAsync(IFrameHandler handler, CancellationToken ct)
{
using var linked = CancellationTokenSource.CreateLinkedTokenSource(_cts.Token, ct);
var acl = PipeAcl.Create(_allowedSid);
// .NET Framework 4.8 uses the legacy constructor overload that takes a PipeSecurity directly.
_current = new NamedPipeServerStream(
_pipeName,
PipeDirection.InOut,
maxNumberOfServerInstances: 1,
PipeTransmissionMode.Byte,
PipeOptions.Asynchronous,
inBufferSize: 64 * 1024,
outBufferSize: 64 * 1024,
pipeSecurity: acl);
try
{
await _current.WaitForConnectionAsync(linked.Token).ConfigureAwait(false);
if (!VerifyCaller(_current, out var reason))
{
_logger.Warning("IPC caller rejected: {Reason}", reason);
_current.Disconnect();
return;
}
using var reader = new FrameReader(_current, leaveOpen: true);
using var writer = new FrameWriter(_current, leaveOpen: true);
// First frame must be a Hello with the correct shared secret.
var first = await reader.ReadFrameAsync(linked.Token).ConfigureAwait(false);
if (first is null || first.Value.Kind != MessageKind.Hello)
{
_logger.Warning("IPC first frame was not Hello; dropping");
return;
}
var hello = MessagePackSerializer.Deserialize<Hello>(first.Value.Body);
if (!string.Equals(hello.SharedSecret, _sharedSecret, StringComparison.Ordinal))
{
await writer.WriteAsync(MessageKind.HelloAck,
new HelloAck { Accepted = false, RejectReason = "shared-secret-mismatch" },
linked.Token).ConfigureAwait(false);
_logger.Warning("IPC Hello rejected: shared-secret-mismatch");
return;
}
if (hello.ProtocolMajor != Hello.CurrentMajor)
{
await writer.WriteAsync(MessageKind.HelloAck,
new HelloAck { Accepted = false, RejectReason = $"major-version-mismatch-peer={hello.ProtocolMajor}-server={Hello.CurrentMajor}" },
linked.Token).ConfigureAwait(false);
_logger.Warning("IPC Hello rejected: major mismatch peer={Peer} server={Server}",
hello.ProtocolMajor, Hello.CurrentMajor);
return;
}
await writer.WriteAsync(MessageKind.HelloAck,
new HelloAck { Accepted = true, HostName = Environment.MachineName },
linked.Token).ConfigureAwait(false);
while (!linked.Token.IsCancellationRequested)
{
var frame = await reader.ReadFrameAsync(linked.Token).ConfigureAwait(false);
if (frame is null) break;
await handler.HandleAsync(frame.Value.Kind, frame.Value.Body, writer, linked.Token).ConfigureAwait(false);
}
}
finally
{
_current.Dispose();
_current = null;
}
}
/// <summary>
/// Runs the server continuously, handling one connection at a time. When a connection ends
/// (clean or error), accepts the next.
/// </summary>
public async Task RunAsync(IFrameHandler handler, CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
try { await RunOneConnectionAsync(handler, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
catch (Exception ex) { _logger.Error(ex, "IPC connection loop error — accepting next"); }
}
}
private bool VerifyCaller(NamedPipeServerStream pipe, out string reason)
{
try
{
pipe.RunAsClient(() =>
{
using var wi = WindowsIdentity.GetCurrent();
if (wi.User is null)
throw new InvalidOperationException("GetCurrent().User is null — cannot verify caller");
if (wi.User != _allowedSid)
throw new UnauthorizedAccessException(
$"caller SID {wi.User.Value} does not match allowed {_allowedSid.Value}");
});
reason = string.Empty;
return true;
}
catch (Exception ex) { reason = ex.Message; return false; }
}
public void Dispose()
{
_cts.Cancel();
_current?.Dispose();
_cts.Dispose();
}
}
public interface IFrameHandler
{
Task HandleAsync(MessageKind kind, byte[] body, FrameWriter writer, CancellationToken ct);
}