fix(galaxy): invalidate writer handle caches on session reconnect

Add IGalaxyDataWriter.InvalidateHandleCaches() and call it in
GalaxyDriver.ReopenAsync after RecreateAsync succeeds. Prior to this
fix, GatewayGalaxyDataWriter's _itemHandles and _supervisedHandles
dictionaries survived across reconnects, causing the next write to
skip AddItem and AdviseSupervisory against already-dead handles.
This commit is contained in:
Joseph Doherty
2026-06-14 00:39:24 -04:00
parent 42b4a923fd
commit f77488eed9
7 changed files with 144 additions and 0 deletions
@@ -293,6 +293,9 @@ public sealed class GalaxyDriver
if (_ownedMxSession is null) return;
var clientOptions = BuildClientOptions(_options.Gateway);
await _ownedMxSession.RecreateAsync(clientOptions, cancellationToken).ConfigureAwait(false);
// The recreated session invalidates every prior gw item handle; drop the writer's handle/advise
// caches so the next write re-AddItems + re-AdviseSupervisory against the fresh session.
_dataWriter?.InvalidateHandleCaches();
}
/// <summary>
@@ -42,6 +42,42 @@ public sealed class GatewayGalaxyDataWriter : IGalaxyDataWriter
_logger = logger ?? NullLogger.Instance;
}
/// <inheritdoc />
public void InvalidateHandleCaches()
{
_itemHandles.Clear();
_supervisedHandles.Clear();
}
// ===== Test seams (internal — not part of the public contract) =====
/// <summary>
/// Count of item-handle cache entries. Zero on a fresh instance or immediately after
/// <see cref="InvalidateHandleCaches"/>. Used by unit tests to verify cache state
/// without running a real gRPC round-trip.
/// </summary>
internal int CachedItemHandleCount => _itemHandles.Count;
/// <summary>
/// Count of supervisory-advised handle entries. Zero on a fresh instance or immediately
/// after <see cref="InvalidateHandleCaches"/>. Used by unit tests to verify cache state.
/// </summary>
internal int CachedSupervisedHandleCount => _supervisedHandles.Count;
/// <summary>
/// Pre-populate both caches as if a write had already occurred. Used by unit tests to
/// simulate the "post-write" state without running a real gRPC gateway session (the SDK
/// session types are sealed + internal-ctor and cannot be faked).
/// </summary>
/// <param name="fullRef">The tag full reference to add to the item-handle cache.</param>
/// <param name="itemHandle">The item handle to cache for that reference.</param>
/// <param name="supervised">When true, also records the handle in the supervised-handle cache.</param>
internal void SeedHandleCachesForTest(string fullRef, int itemHandle, bool supervised)
{
_itemHandles[fullRef] = itemHandle;
if (supervised) _supervisedHandles.TryAdd(itemHandle, 0);
}
/// <summary>Writes values to Galaxy tags through the gateway.</summary>
/// <param name="writes">The write requests.</param>
/// <param name="securityResolver">Function to resolve security classification per tag.</param>
@@ -30,4 +30,8 @@ public interface IGalaxyDataWriter
IReadOnlyList<WriteRequest> writes,
Func<string, SecurityClassification> securityResolver,
CancellationToken cancellationToken);
/// <summary>Drop cached gateway item handles + supervisory-advise state. Call after a session
/// reconnect — the prior handles are dead, so the next write must re-AddItem + re-AdviseSupervisory.</summary>
void InvalidateHandleCaches();
}
@@ -10,6 +10,10 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
/// </summary>
internal sealed class TracedGalaxyDataWriter(IGalaxyDataWriter inner, string clientName) : IGalaxyDataWriter
{
/// <inheritdoc />
/// <remarks>No span — this is a local cache-clear operation, not a gateway round-trip.</remarks>
public void InvalidateHandleCaches() => inner.InvalidateHandleCaches();
/// <summary>Writes data to Galaxy while recording telemetry span.</summary>
/// <param name="writes">The list of write requests to process.</param>
/// <param name="securityResolver">Function to resolve security classification for tag references.</param>
@@ -94,6 +94,9 @@ public sealed class GalaxyDriverWriteTests
}
return Task.FromResult<IReadOnlyList<WriteResult>>(results);
}
/// <inheritdoc />
public void InvalidateHandleCaches() { /* no-op — this fake has no handle caches */ }
}
private static GalaxyAttribute Attr(string name, int sec)
@@ -178,6 +178,9 @@ public sealed class GalaxyTelemetryTests
CancellationToken cancellationToken)
=> Task.FromResult<IReadOnlyList<WriteResult>>(
writes.Select(_ => new WriteResult(0u)).ToList());
/// <inheritdoc />
public void InvalidateHandleCaches() { /* no-op — this fake has no handle caches */ }
}
private sealed class FakeHierarchy : IGalaxyHierarchySource
@@ -0,0 +1,91 @@
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.Runtime;
/// <summary>
/// Tests for <see cref="GatewayGalaxyDataWriter.InvalidateHandleCaches"/>.
/// The SDK session types are sealed with internal ctors and cannot be faked, so we
/// drive the cache-seeding path through
/// <see cref="GatewayGalaxyDataWriter.SeedHandleCachesForTest"/> and verify the
/// handle-count seams — the contract under test is purely that
/// <see cref="GatewayGalaxyDataWriter.InvalidateHandleCaches"/> zeroes both dictionaries
/// so the next write is forced to re-AddItem + re-AdviseSupervisory.
/// </summary>
public sealed class GatewayGalaxyDataWriterTests
{
private static GalaxyMxSession MinimalSession()
=> new(new GalaxyMxAccessOptions(ClientName: "OtOpcUa-Test"));
/// <summary>
/// Approach (b): seed the item-handle cache directly via the internal test seam,
/// confirm the count is positive, call <see cref="GatewayGalaxyDataWriter.InvalidateHandleCaches"/>,
/// and confirm both caches are cleared.
/// The next write (not simulated here — needs a live gw) would therefore be forced
/// to re-AddItem because the cache is empty.
/// </summary>
[Fact]
public void InvalidateHandleCaches_clears_item_and_supervised_handle_caches()
{
var session = MinimalSession();
var writer = new GatewayGalaxyDataWriter(session, writeUserId: 0);
// Pre-seed both caches via the internal test seam so we can assert the
// "after a write" state without spinning up a real gRPC gateway session.
writer.SeedHandleCachesForTest("TestMachine_001.TestAttr", itemHandle: 42, supervised: true);
writer.CachedItemHandleCount.ShouldBe(1);
writer.CachedSupervisedHandleCount.ShouldBe(1);
writer.InvalidateHandleCaches();
writer.CachedItemHandleCount.ShouldBe(0);
writer.CachedSupervisedHandleCount.ShouldBe(0);
}
/// <summary>
/// A second seed + invalidate cycle proves the method isn't one-shot — a reconnect
/// followed by writes followed by another reconnect must also start fresh.
/// </summary>
[Fact]
public void InvalidateHandleCaches_is_repeatable_across_multiple_reconnects()
{
var session = MinimalSession();
var writer = new GatewayGalaxyDataWriter(session, writeUserId: 0);
// First session cycle
writer.SeedHandleCachesForTest("Tag.A", itemHandle: 1, supervised: false);
writer.SeedHandleCachesForTest("Tag.B", itemHandle: 2, supervised: true);
writer.CachedItemHandleCount.ShouldBe(2);
writer.InvalidateHandleCaches();
writer.CachedItemHandleCount.ShouldBe(0);
writer.CachedSupervisedHandleCount.ShouldBe(0);
// Second session cycle — handles re-populated after the reconnect's replay
writer.SeedHandleCachesForTest("Tag.A", itemHandle: 99, supervised: true);
writer.CachedItemHandleCount.ShouldBe(1);
writer.InvalidateHandleCaches();
writer.CachedItemHandleCount.ShouldBe(0);
}
/// <summary>
/// <see cref="GatewayGalaxyDataWriter.InvalidateHandleCaches"/> on a fresh (never-used)
/// writer must be a no-op rather than throwing — the reconnect supervisor may call it
/// before any write has occurred.
/// </summary>
[Fact]
public void InvalidateHandleCaches_on_empty_caches_is_a_noop()
{
var session = MinimalSession();
var writer = new GatewayGalaxyDataWriter(session, writeUserId: 0);
// Caches are empty — must not throw.
writer.CachedItemHandleCount.ShouldBe(0);
writer.CachedSupervisedHandleCount.ShouldBe(0);
writer.InvalidateHandleCaches();
writer.CachedItemHandleCount.ShouldBe(0);
writer.CachedSupervisedHandleCount.ShouldBe(0);
}
}