Files
scadalink-design/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.Connection.cs
Joseph Doherty b74e139a85 fix(lmxproxy): reset probe timer after reconnect to prevent false stale triggers
Without this, the staleness check could fire immediately after reconnect
before the first OnDataChange callback arrives, causing a reconnect loop.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 15:06:42 -04:00

333 lines
12 KiB
C#

using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using ArchestrA.MxAccess;
using Serilog;
using ZB.MOM.WW.LmxProxy.Host.Domain;
namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
{
public sealed partial class MxAccessClient
{
/// <summary>
/// Connects to MxAccess on the dedicated STA thread.
/// </summary>
public async Task ConnectAsync(CancellationToken ct = default)
{
if (_disposed) throw new ObjectDisposedException(nameof(MxAccessClient));
if (IsConnected) return;
SetState(ConnectionState.Connecting);
try
{
await _staThread.RunAsync(() => ConnectInternal());
lock (_lock)
{
_connectedSince = DateTime.UtcNow;
}
SetState(ConnectionState.Connected);
Log.Information("Connected to MxAccess (handle={Handle})", _connectionHandle);
// Recreate any stored subscriptions from a previous connection
await RecreateStoredSubscriptionsAsync();
// Start persistent probe subscription
await StartProbeSubscriptionAsync();
}
catch (Exception ex)
{
Log.Error(ex, "Failed to connect to MxAccess");
await CleanupComObjectsAsync();
SetState(ConnectionState.Error, ex.Message);
throw;
}
}
/// <summary>
/// Disconnects from MxAccess on the dedicated STA thread.
/// </summary>
public async Task DisconnectAsync(CancellationToken ct = default)
{
if (!IsConnected) return;
SetState(ConnectionState.Disconnecting);
try
{
await _staThread.RunAsync(() => DisconnectInternal());
SetState(ConnectionState.Disconnected);
Log.Information("Disconnected from MxAccess");
}
catch (Exception ex)
{
Log.Error(ex, "Error during disconnect");
SetState(ConnectionState.Error, ex.Message);
}
}
/// <summary>
/// Starts the auto-reconnect monitor loop.
/// Call this after initial ConnectAsync succeeds.
/// </summary>
public void StartMonitorLoop()
{
if (!_autoReconnect) return;
_reconnectCts = new CancellationTokenSource();
Task.Run(() => MonitorConnectionAsync(_reconnectCts.Token));
}
/// <summary>
/// Stops the auto-reconnect monitor loop.
/// </summary>
public void StopMonitorLoop()
{
_reconnectCts?.Cancel();
}
/// <summary>Gets the UTC time when the connection was established.</summary>
public DateTime ConnectedSince
{
get { lock (_lock) { return _connectedSince; } }
}
/// <summary>Gets the number of times the client has reconnected since startup.</summary>
public int ReconnectCount => _reconnectCount;
// ── Internal synchronous methods ──────────
private void ConnectInternal()
{
lock (_lock)
{
// Create COM object
_lmxProxy = new LMXProxyServer();
// Wire event handlers
_lmxProxy.OnDataChange += OnDataChange;
_lmxProxy.OnWriteComplete += OnWriteComplete;
// Register with MxAccess using unique client name
_connectionHandle = _lmxProxy.Register(_clientName);
Log.Information("Registered with MxAccess as '{ClientName}'", _clientName);
if (_connectionHandle <= 0)
{
throw new InvalidOperationException("Failed to register with MxAccess - invalid handle returned");
}
}
}
private void DisconnectInternal()
{
lock (_lock)
{
if (_lmxProxy == null || _connectionHandle <= 0) return;
try
{
// Unadvise all active subscriptions before unregistering
foreach (var kvp in new Dictionary<string, int>(_addressToHandle))
{
try
{
_lmxProxy.UnAdvise(_connectionHandle, kvp.Value);
_lmxProxy.RemoveItem(_connectionHandle, kvp.Value);
}
catch (Exception ex)
{
Log.Debug(ex, "Error removing subscription for {Address} during disconnect", kvp.Key);
}
}
// Remove event handlers
_lmxProxy.OnDataChange -= OnDataChange;
_lmxProxy.OnWriteComplete -= OnWriteComplete;
// Unregister
_lmxProxy.Unregister(_connectionHandle);
}
catch (Exception ex)
{
Log.Warning(ex, "Error during MxAccess unregister");
}
finally
{
// Force-release COM object
try
{
Marshal.ReleaseComObject(_lmxProxy);
}
catch { }
_lmxProxy = null;
_connectionHandle = 0;
// Clear handle tracking (but keep _storedSubscriptions for reconnect)
_handleToAddress.Clear();
_addressToHandle.Clear();
_pendingWrites.Clear();
}
}
}
/// <summary>
/// Subscribes to the configured probe test tag so that OnDataChange
/// callbacks update <see cref="_lastProbeValueTime"/>. Called after
/// connect (and reconnect). The subscription is stored for reconnect
/// replay like any other subscription.
/// </summary>
private async Task StartProbeSubscriptionAsync()
{
if (_probeTestTagAddress == null) return;
_lastProbeValueTime = DateTime.UtcNow;
await _staThread.RunAsync(() =>
{
lock (_lock)
{
if (!IsConnected || _lmxProxy == null) return;
// Subscribe (skips if already subscribed from reconnect replay)
SubscribeInternal(_probeTestTagAddress);
// Store a no-op callback — the real work happens in OnProbeDataChange
// which is called from OnDataChange before the stored callback
_storedSubscriptions[_probeTestTagAddress] = (_, __) => { };
}
});
Log.Information("Probe subscription started for {Tag} (stale threshold={ThresholdMs}ms)",
_probeTestTagAddress, _probeStaleThresholdMs);
}
/// <summary>
/// Called from <see cref="OnDataChange"/> when a value arrives for the probe tag.
/// Updates the last-seen timestamp so the monitor loop can detect staleness.
/// </summary>
internal void OnProbeDataChange(string address, Vtq vtq)
{
_lastProbeValueTime = DateTime.UtcNow;
}
/// <summary>
/// Auto-reconnect monitor loop with persistent subscription probe.
/// - If disconnected: attempt reconnect.
/// - If connected and probe configured: check time since last probe value update.
/// If stale beyond threshold, force disconnect and reconnect.
/// </summary>
private async Task MonitorConnectionAsync(CancellationToken ct)
{
Log.Information("Connection monitor loop started (interval={IntervalMs}ms, probe={ProbeEnabled}, staleThreshold={StaleMs}ms)",
_monitorIntervalMs, _probeTestTagAddress != null, _probeStaleThresholdMs);
while (!ct.IsCancellationRequested)
{
try
{
await Task.Delay(_monitorIntervalMs, ct);
}
catch (OperationCanceledException)
{
break;
}
// -- Case 1: Already disconnected --
if (!IsConnected)
{
await AttemptReconnectAsync(ct);
// Reset probe timer so the next check gives the new connection
// a full interval to deliver its first OnDataChange callback
_lastProbeValueTime = DateTime.UtcNow;
continue;
}
// -- Case 2: Connected, no probe configured --
if (_probeTestTagAddress == null)
continue;
// -- Case 3: Connected, check probe staleness --
var elapsed = DateTime.UtcNow - _lastProbeValueTime;
if (elapsed.TotalMilliseconds > _probeStaleThresholdMs)
{
Log.Warning("Probe tag {Tag} stale for {ElapsedMs}ms (threshold={ThresholdMs}ms) — forcing reconnect",
_probeTestTagAddress, (int)elapsed.TotalMilliseconds, _probeStaleThresholdMs);
try
{
await DisconnectAsync(ct);
}
catch (Exception ex)
{
Log.Warning(ex, "Error during forced disconnect before reconnect");
}
await AttemptReconnectAsync(ct);
_lastProbeValueTime = DateTime.UtcNow;
}
}
Log.Information("Connection monitor loop exited");
}
private async Task AttemptReconnectAsync(CancellationToken ct)
{
Log.Information("Attempting reconnect...");
SetState(ConnectionState.Reconnecting);
try
{
await ConnectAsync(ct);
Interlocked.Increment(ref _reconnectCount);
Log.Information("Reconnected to MxAccess successfully (reconnect #{Count})", _reconnectCount);
}
catch (OperationCanceledException)
{
// Let the outer loop handle cancellation
}
catch (Exception ex)
{
Log.Warning(ex, "Reconnect attempt failed, will retry at next interval");
}
}
/// <summary>
/// Cleans up COM objects on the dedicated STA thread after a failed connection.
/// </summary>
private async Task CleanupComObjectsAsync()
{
try
{
await _staThread.RunAsync(() =>
{
lock (_lock)
{
if (_lmxProxy != null)
{
try { _lmxProxy.OnDataChange -= OnDataChange; } catch { }
try { _lmxProxy.OnWriteComplete -= OnWriteComplete; } catch { }
try { Marshal.ReleaseComObject(_lmxProxy); } catch { }
_lmxProxy = null;
}
_connectionHandle = 0;
_handleToAddress.Clear();
_addressToHandle.Clear();
_pendingWrites.Clear();
}
});
}
catch (Exception ex)
{
Log.Warning(ex, "Error during COM object cleanup");
}
}
}
}