Phase 1 Streams B–E scaffold + Phase 2 Streams A–C scaffold — 8 new projects with ~70 new tests, all green alongside the 494 v1 IntegrationTests baseline (parity preserved: no v1 tests broken; legacy OtOpcUa.Host untouched). Phase 1 finish: Configuration project (16 entities + 10 enums + DbContext + DesignTimeDbContextFactory + InitialSchema/StoredProcedures/AuthorizationGrants migrations — 8 procs including sp_PublishGeneration with MERGE on ExternalIdReservation per decision #124, sp_RollbackToGeneration cloning rows into a new published generation, sp_ValidateDraft with cross-cluster-namespace + EquipmentUuid-immutability + ZTag/SAPID reservation pre-flight, sp_ComputeGenerationDiff with CHECKSUM-based row signature — plus OtOpcUaNode/OtOpcUaAdmin SQL roles with EXECUTE grants scoped to per-principal-class proc sets and DENY UPDATE/DELETE/INSERT/SELECT on dbo schema); managed DraftValidator covering UNS segment regex, path length, EquipmentUuid immutability across generations, same-cluster namespace binding (decision #122), reservation pre-flight, EquipmentId derivation (decision #125), driver↔namespace compatibility — returning every failing rule in one pass; LiteDB local cache with round-trip + ring pruning + corruption-fast-fail; GenerationApplier with per-entity Added/Removed/Modified diff and dependency-ordered callbacks (namespace → driver → device → equipment → poll-group → tag, Removed before Added); Core project with GenericDriverNodeManager (scaffold for the Phase 2 Galaxy port) and DriverHost lifecycle registry; Server project using Microsoft.Extensions.Hosting BackgroundService replacing TopShelf, with NodeBootstrap that falls back to LiteDB cache when the central DB is unreachable (decision #79); Admin project scaffolded as Blazor Server with Bootstrap 5 sidebar layout, cookie auth, three admin roles (ConfigViewer/ConfigEditor/FleetAdmin), Cluster + Generation services fronting the stored procs. Phase 2 scaffold: Driver.Galaxy.Shared (netstandard2.0) with full MessagePack IPC contract surface — Hello version negotiation, Open/CloseSession, Heartbeat, DiscoverHierarchy + GalaxyObjectInfo/GalaxyAttributeInfo, Read/WriteValues, Subscribe/Unsubscribe/OnDataChange, AlarmSubscribe/Event/Ack, HistoryRead, HostConnectivityStatus, Recycle — plus length-prefixed framing (decision #28) with a 16 MiB cap and thread-safe FrameWriter/FrameReader; Driver.Galaxy.Host (net48) implementing the Tier C cross-cutting protections from driver-stability.md — strict PipeAcl (allow configured server SID only, explicit deny on LocalSystem + Administrators), PipeServer with caller-SID verification via pipe.RunAsClient + WindowsIdentity.GetCurrent and per-process shared-secret Hello, Galaxy-specific MemoryWatchdog (warn at max(1.5×baseline, +200 MB), soft-recycle at max(2×baseline, +200 MB), hard ceiling 1.5 GB, slope ≥5 MB/min over 30-min rolling window), RecyclePolicy (1 soft recycle per hour cap + 03:00 local daily scheduled), PostMortemMmf (1000-entry ring buffer in %ProgramData%\OtOpcUa\driver-postmortem\galaxy.mmf, survives hard crash, readable cross-process), MxAccessHandle : SafeHandle (ReleaseHandle loops Marshal.ReleaseComObject until refcount=0 then calls optional unregister callback), StaPump with responsiveness probe (BlockingCollection dispatcher for Phase 1 — real Win32 GetMessage/DispatchMessage pump slots in with the same semantics when the Galaxy code lift happens), IsExternalInit shim for init setters on .NET 4.8; Driver.Galaxy.Proxy (net10) implementing IDriver + ITagDiscovery forwarding over the IPC channel with MX data-type and security-classification mapping, plus Supervisor pieces — Backoff (5s → 15s → 60s capped, reset-on-stable-run), CircuitBreaker (3 crashes per 5 min opens; 1h → 4h → manual cooldown escalation; sticky alert doesn't auto-clear), HeartbeatMonitor (2s cadence, 3 consecutive misses = host dead per driver-stability.md). Infrastructure: docker SQL Server remapped to host port 14330 to coexist with the native MSSQL14 Galaxy ZB DB instance on 1433; NuGetAuditSuppress applied per-project for two System.Security.Cryptography.Xml advisories that only reach via EF Core Design with PrivateAssets=all (fix ships in 11.0.0-preview); .slnx gains 14 project registrations. Deferred with explicit TODOs in docs/v2/implementation/phase-2-partial-exit-evidence.md: Phase 1 Stream E Admin UI pages (Generations listing + draft-diff-publish, Equipment CRUD with OPC 40010 fields, UNS Areas/Lines tabs, ACLs + permission simulator, Generic JSON config editor, SignalR real-time, Release-Reservation + Merge-Equipment workflows, LDAP login page, AppServer smoke test per decision #142), Phase 2 Stream D (Galaxy MXAccess code lift out of legacy OtOpcUa.Host, dual-service installer, appsettings → DriverConfig migration script, legacy Host deletion — blocked by parity), Phase 2 Stream E (v1 IntegrationTests against v2 topology, Client.CLI walkthrough diff, four 2026-04-13 stability findings regression tests, adversarial review — requires live MXAccess runtime).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
using System;
|
||||
using System.Runtime.ConstrainedExecution;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Sta;
|
||||
|
||||
/// <summary>
|
||||
/// SafeHandle-style lifetime wrapper for an <c>LMXProxyServer</c> COM connection. Per Task B.3
|
||||
/// + decision #65: <see cref="ReleaseHandle"/> must call <c>Marshal.ReleaseComObject</c> until
|
||||
/// refcount = 0, then <c>UnregisterProxy</c>. The finalizer runs as a
|
||||
/// <see cref="CriticalFinalizerObject"/> to honor AppDomain-unload ordering.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This scaffold accepts any RCW (tagged as <see cref="object"/>) so we can unit-test the
|
||||
/// release logic with a mock. The concrete wiring to <c>ArchestrA.MxAccess.LMXProxyServer</c>
|
||||
/// lands when the actual Galaxy code moves over (the part deferred to the parity gate).
|
||||
/// </remarks>
|
||||
public sealed class MxAccessHandle : SafeHandle
|
||||
{
|
||||
private object? _comObject;
|
||||
private readonly Action<object>? _unregister;
|
||||
|
||||
public MxAccessHandle(object comObject, Action<object>? unregister = null)
|
||||
: base(IntPtr.Zero, ownsHandle: true)
|
||||
{
|
||||
_comObject = comObject ?? throw new ArgumentNullException(nameof(comObject));
|
||||
_unregister = unregister;
|
||||
|
||||
// The pointer value itself doesn't matter — we're wrapping an RCW, not a native handle.
|
||||
SetHandle(new IntPtr(1));
|
||||
}
|
||||
|
||||
public override bool IsInvalid => handle == IntPtr.Zero;
|
||||
|
||||
public object? RawComObject => _comObject;
|
||||
|
||||
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.Success)]
|
||||
protected override bool ReleaseHandle()
|
||||
{
|
||||
if (_comObject is null) return true;
|
||||
|
||||
try { _unregister?.Invoke(_comObject); }
|
||||
catch { /* swallow — we're in finalizer/cleanup; log elsewhere */ }
|
||||
|
||||
try
|
||||
{
|
||||
if (Marshal.IsComObject(_comObject))
|
||||
{
|
||||
while (Marshal.ReleaseComObject(_comObject) > 0) { /* loop until fully released */ }
|
||||
}
|
||||
}
|
||||
catch { /* swallow */ }
|
||||
|
||||
_comObject = null;
|
||||
SetHandle(IntPtr.Zero);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
91
src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host/Sta/StaPump.cs
Normal file
91
src/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host/Sta/StaPump.cs
Normal file
@@ -0,0 +1,91 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Sta;
|
||||
|
||||
/// <summary>
|
||||
/// Dedicated STA thread that owns all <c>LMXProxyServer</c> COM instances. Work items are
|
||||
/// posted from any thread and dispatched on the STA. Per <c>driver-stability.md</c> Galaxy
|
||||
/// deep dive §"STA thread + Win32 message pump".
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Phase 2 scaffold: uses a <see cref="BlockingCollection{T}"/> dispatcher instead of the real
|
||||
/// Win32 <c>GetMessage/DispatchMessage</c> pump. Real pump arrives when the v1 <c>StaComThread</c>
|
||||
/// is lifted — that's part of the deferred Galaxy code move. The apartment state and work
|
||||
/// dispatch semantics are identical so production code can be swapped in without changes.
|
||||
/// </remarks>
|
||||
public sealed class StaPump : IDisposable
|
||||
{
|
||||
private readonly Thread _thread;
|
||||
private readonly BlockingCollection<Action> _workQueue = new(new ConcurrentQueue<Action>());
|
||||
private readonly TaskCompletionSource<bool> _started = new(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
private volatile bool _disposed;
|
||||
|
||||
public int ThreadId => _thread.ManagedThreadId;
|
||||
public DateTime LastDispatchedUtc { get; private set; } = DateTime.MinValue;
|
||||
public int QueueDepth => _workQueue.Count;
|
||||
|
||||
public StaPump(string name = "Galaxy.Sta")
|
||||
{
|
||||
_thread = new Thread(PumpLoop) { Name = name, IsBackground = true };
|
||||
_thread.SetApartmentState(ApartmentState.STA);
|
||||
_thread.Start();
|
||||
}
|
||||
|
||||
public Task WaitForStartedAsync() => _started.Task;
|
||||
|
||||
/// <summary>Posts a work item; resolves once it's executed on the STA thread.</summary>
|
||||
public Task<T> InvokeAsync<T>(Func<T> work)
|
||||
{
|
||||
if (_disposed) throw new ObjectDisposedException(nameof(StaPump));
|
||||
|
||||
var tcs = new TaskCompletionSource<T>(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
_workQueue.Add(() =>
|
||||
{
|
||||
try { tcs.SetResult(work()); }
|
||||
catch (Exception ex) { tcs.SetException(ex); }
|
||||
});
|
||||
return tcs.Task;
|
||||
}
|
||||
|
||||
public Task InvokeAsync(Action work) => InvokeAsync(() => { work(); return 0; });
|
||||
|
||||
/// <summary>
|
||||
/// Health probe — returns true if a no-op work item round-trips within <paramref name="timeout"/>.
|
||||
/// Used by the supervisor; timeout means the pump is wedged and a recycle is warranted.
|
||||
/// </summary>
|
||||
public async Task<bool> IsResponsiveAsync(TimeSpan timeout)
|
||||
{
|
||||
var task = InvokeAsync(() => { });
|
||||
var completed = await Task.WhenAny(task, Task.Delay(timeout)).ConfigureAwait(false);
|
||||
return completed == task;
|
||||
}
|
||||
|
||||
private void PumpLoop()
|
||||
{
|
||||
_started.TrySetResult(true);
|
||||
try
|
||||
{
|
||||
while (!_disposed)
|
||||
{
|
||||
if (_workQueue.TryTake(out var work, Timeout.Infinite))
|
||||
{
|
||||
work();
|
||||
LastDispatchedUtc = DateTime.UtcNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (InvalidOperationException) { /* CompleteAdding called during dispose */ }
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_workQueue.CompleteAdding();
|
||||
_thread.Join(TimeSpan.FromSeconds(5));
|
||||
_workQueue.Dispose();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user