using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Backend.MxAccess;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Backend.Stability;
///
/// Per-platform + per-AppEngine runtime probe. Subscribes to <TagName>.ScanState
/// for each $WinPlatform and $AppEngine gobject, tracks Unknown → Running → Stopped
/// transitions, and fires so
/// can forward per-host events through the existing IPC OnHostStatusChanged event.
/// Pure-logic state machine with an injected clock so it's deterministically testable —
/// port of v1 GalaxyRuntimeProbeManager without the OPC UA node-manager coupling.
///
///
/// State machine rules (documented in v1's runtimestatus.md and preserved here):
///
/// - ScanState is on-change-only — a stably-Running host may go hours without a
/// callback. Running → Stopped is driven by an explicit ScanState=false callback,
/// never by starvation.
/// - Unknown → Running is a startup transition and does NOT fire StateChanged (would
/// paint every host as "just recovered" at startup, which is noise).
/// - Stopped → Running and Running → Stopped fire StateChanged. Unknown → Stopped
/// fires StateChanged because that's a first-known-bad signal operators need.
/// - All public methods are thread-safe. Callbacks fire outside the internal lock to
/// avoid lock inversion with caller-owned state.
///
///
public sealed class GalaxyRuntimeProbeManager : IDisposable
{
public const int CategoryWinPlatform = 1;
public const int CategoryAppEngine = 3;
public const string ProbeAttribute = ".ScanState";
private readonly Func _clock;
private readonly Func, Task> _subscribe;
private readonly Func _unsubscribe;
private readonly object _lock = new();
// probe tag → per-host state
private readonly Dictionary _byProbe = new(StringComparer.OrdinalIgnoreCase);
// tag name → probe tag (for reverse lookup on the desired-set diff)
private readonly Dictionary _probeByTagName = new(StringComparer.OrdinalIgnoreCase);
private bool _disposed;
///
/// Fires on every state transition that operators should react to. See class remarks
/// for the rules on which transitions fire.
///
public event EventHandler? StateChanged;
public GalaxyRuntimeProbeManager(
Func, Task> subscribe,
Func unsubscribe)
: this(subscribe, unsubscribe, () => DateTime.UtcNow) { }
internal GalaxyRuntimeProbeManager(
Func, Task> subscribe,
Func unsubscribe,
Func clock)
{
_subscribe = subscribe ?? throw new ArgumentNullException(nameof(subscribe));
_unsubscribe = unsubscribe ?? throw new ArgumentNullException(nameof(unsubscribe));
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
}
/// Number of probes currently advised. Test/dashboard hook.
public int ActiveProbeCount
{
get { lock (_lock) return _byProbe.Count; }
}
///
/// Snapshot every currently-tracked host's state. One entry per probe.
///
public IReadOnlyList SnapshotStates()
{
lock (_lock)
{
return _byProbe.Select(kv => new HostProbeSnapshot(
TagName: kv.Value.TagName,
State: kv.Value.State,
LastChangedUtc: kv.Value.LastStateChangeUtc)).ToList();
}
}
///
/// Query the current runtime state for . Returns
/// when the host is not tracked.
///
public HostRuntimeState GetState(string tagName)
{
lock (_lock)
{
if (_probeByTagName.TryGetValue(tagName, out var probe)
&& _byProbe.TryGetValue(probe, out var state))
return state.State;
return HostRuntimeState.Unknown;
}
}
///
/// Diff the desired host set (filtered $WinPlatform / $AppEngine from the latest Discover)
/// against the currently-tracked set and advise / unadvise as needed. Idempotent:
/// calling twice with the same set does nothing.
///
public async Task SyncAsync(IEnumerable desiredHosts)
{
if (_disposed) return;
var desired = desiredHosts
.Where(h => !string.IsNullOrWhiteSpace(h.TagName))
.ToDictionary(h => h.TagName, StringComparer.OrdinalIgnoreCase);
List toAdvise;
List toUnadvise;
lock (_lock)
{
toAdvise = desired.Keys
.Where(tag => !_probeByTagName.ContainsKey(tag))
.ToList();
toUnadvise = _probeByTagName.Keys
.Where(tag => !desired.ContainsKey(tag))
.Select(tag => _probeByTagName[tag])
.ToList();
foreach (var tag in toAdvise)
{
var probe = tag + ProbeAttribute;
_probeByTagName[tag] = probe;
_byProbe[probe] = new HostProbeState
{
TagName = tag,
State = HostRuntimeState.Unknown,
LastStateChangeUtc = _clock(),
};
}
foreach (var probe in toUnadvise)
{
_byProbe.Remove(probe);
}
foreach (var removedTag in _probeByTagName.Keys.Where(t => !desired.ContainsKey(t)).ToList())
{
_probeByTagName.Remove(removedTag);
}
}
foreach (var tag in toAdvise)
{
var probe = tag + ProbeAttribute;
try
{
await _subscribe(probe, OnProbeCallback);
}
catch
{
// Rollback on subscribe failure so a later Tick can't transition a never-advised
// probe into a false Stopped state. Callers can re-Sync later to retry.
lock (_lock)
{
_byProbe.Remove(probe);
_probeByTagName.Remove(tag);
}
}
}
foreach (var probe in toUnadvise)
{
try { await _unsubscribe(probe); } catch { /* best-effort cleanup */ }
}
}
///
/// Public entry point for tests and internal callbacks. Production flow: MxAccessClient's
/// SubscribeAsync delivers VTQ updates through the callback wired in ,
/// which calls this method under the lock to update state and fires
/// outside the lock for any transition that matters.
///
public void OnProbeCallback(string probeTag, Vtq vtq)
{
if (_disposed) return;
HostStateTransition? transition = null;
lock (_lock)
{
if (!_byProbe.TryGetValue(probeTag, out var state)) return;
var isRunning = vtq.Quality >= 192 && vtq.Value is bool b && b;
var now = _clock();
var previous = state.State;
state.LastCallbackUtc = now;
if (isRunning)
{
state.GoodUpdateCount++;
if (previous != HostRuntimeState.Running)
{
state.State = HostRuntimeState.Running;
state.LastStateChangeUtc = now;
if (previous == HostRuntimeState.Stopped)
{
transition = new HostStateTransition(state.TagName, previous, HostRuntimeState.Running, now);
}
}
}
else
{
state.FailureCount++;
if (previous != HostRuntimeState.Stopped)
{
state.State = HostRuntimeState.Stopped;
state.LastStateChangeUtc = now;
transition = new HostStateTransition(state.TagName, previous, HostRuntimeState.Stopped, now);
}
}
}
if (transition is { } t)
{
StateChanged?.Invoke(this, t);
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
lock (_lock)
{
_byProbe.Clear();
_probeByTagName.Clear();
}
}
private sealed class HostProbeState
{
public string TagName { get; set; } = "";
public HostRuntimeState State { get; set; }
public DateTime LastStateChangeUtc { get; set; }
public DateTime? LastCallbackUtc { get; set; }
public long GoodUpdateCount { get; set; }
public long FailureCount { get; set; }
}
}
public enum HostRuntimeState
{
Unknown,
Running,
Stopped,
}
public sealed record HostStateTransition(
string TagName,
HostRuntimeState OldState,
HostRuntimeState NewState,
DateTime AtUtc);
public sealed record HostProbeSnapshot(
string TagName,
HostRuntimeState State,
DateTime LastChangedUtc);
public readonly record struct HostProbeTarget(string TagName, int CategoryId)
{
public bool IsRuntimeHost =>
CategoryId == GalaxyRuntimeProbeManager.CategoryWinPlatform
|| CategoryId == GalaxyRuntimeProbeManager.CategoryAppEngine;
}