test(historian-gateway): env-gated live validation vs wonder-sql-vd03 (read/write/alarm round-trips)

Claude-Session: https://claude.ai/code/session_012SDSQ3AcaXqPcBtDESBRii
This commit is contained in:
Joseph Doherty
2026-06-26 19:01:36 -04:00
parent 2a5c717755
commit b32436902a
2 changed files with 370 additions and 0 deletions
@@ -0,0 +1,166 @@
using System.Net.Sockets;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.OtOpcUa.Runtime.Historian;
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Tests.Live;
/// <summary>
/// Env-gated fixture for the live validation suite that exercises the gateway-backed
/// read / write / alarm paths against a real, running <c>ZB.MOM.WW.HistorianGateway</c>
/// sidecar (typically <c>wonder-sql-vd03</c> on the corporate VPN). Mirrors the
/// HistorianGateway repo's <c>GatewayIntegrationFixture</c> env-gating convention and this
/// repo's <c>OpcPlcFixture</c> reachability-probe pattern: the fixture is <em>cheap</em> at
/// construction (reads env vars + one short TCP probe) and records a <see cref="SkipReason"/>
/// so tests call <c>Assert.Skip(SkipReason)</c> and report as <b>Skipped</b> (not Failed)
/// when the suite is not configured or the gateway is unreachable.
/// </summary>
/// <remarks>
/// <para>
/// <b>Env vars consumed</b> (skip-gate + config source):
/// <list type="bullet">
/// <item><c>HISTGW_GATEWAY_ENDPOINT</c> — absolute gateway URI, e.g. <c>https://wonder-sql-vd03:5222</c>. Required; absent ⇒ all tests skip.</item>
/// <item><c>HISTGW_GATEWAY_APIKEY</c> — the <c>histgw_&lt;id&gt;_&lt;secret&gt;</c> key (must carry <c>historian:read</c> + <c>historian:write</c> scopes). Required; absent ⇒ all tests skip.</item>
/// <item><c>HISTGW_TEST_TAG</c> — an existing Galaxy / historian tag for the read round-trip.</item>
/// <item><c>HISTGW_WRITE_SANDBOX_TAG</c> — a Float sandbox tag the write round-trip may <c>EnsureTags</c> + write (e.g. <c>HistGW.LiveTest.Sandbox</c>).</item>
/// <item><c>HISTGW_ALARM_SOURCE</c> — a source name for the alarm <c>SendEvent</c> → <c>ReadEvents</c> round-trip.</item>
/// <item><c>HISTGW_GATEWAY_ALLOW_UNTRUSTED</c> — <c>true</c> to accept a self-signed dev cert (optional).</item>
/// </list>
/// </para>
/// <para>
/// <b>VPN-gated.</b> <c>wonder-sql-vd03</c> is reachable only on the VPN. When the endpoint
/// is configured but the host does not accept a TCP connection within
/// <see cref="ProbeTimeout"/>, <see cref="SkipReason"/> is set to a message that prompts
/// the operator to connect the VPN — the suite skips rather than hangs.
/// </para>
/// <para>
/// <b>Never connects from the fixture.</b> The gRPC channel is built lazily by the package
/// client, so constructing an adapter performs no network I/O. The fixture's only network
/// touch is the bounded TCP reachability probe.
/// </para>
/// </summary>
public sealed class GatewayLiveFixture
{
private const string EnvEndpoint = "HISTGW_GATEWAY_ENDPOINT";
private const string EnvApiKey = "HISTGW_GATEWAY_APIKEY";
private const string EnvTestTag = "HISTGW_TEST_TAG";
private const string EnvWriteSandboxTag = "HISTGW_WRITE_SANDBOX_TAG";
private const string EnvAlarmSource = "HISTGW_ALARM_SOURCE";
private const string EnvAllowUntrusted = "HISTGW_GATEWAY_ALLOW_UNTRUSTED";
/// <summary>Bounded deadline for the TCP reachability probe (keeps an unreachable VPN from hanging the run).</summary>
private static readonly TimeSpan ProbeTimeout = TimeSpan.FromSeconds(3);
/// <summary>Short per-call deadline so a misconfigured / unreachable gateway fails fast instead of hanging.</summary>
private static readonly TimeSpan CallTimeout = TimeSpan.FromSeconds(20);
private readonly string? _endpoint;
private readonly string? _apiKey;
private readonly bool _allowUntrusted;
/// <summary>
/// Reads the env config and runs one bounded TCP reachability probe. On any gap
/// (missing endpoint/key, malformed URI, or unreachable host) <see cref="SkipReason"/> is set
/// and the suite skips cleanly.
/// </summary>
public GatewayLiveFixture()
{
_endpoint = Trimmed(EnvEndpoint);
_apiKey = Trimmed(EnvApiKey);
TestTag = Trimmed(EnvTestTag);
WriteSandboxTag = Trimmed(EnvWriteSandboxTag);
AlarmSource = Trimmed(EnvAlarmSource);
_allowUntrusted = string.Equals(
Trimmed(EnvAllowUntrusted), "true", StringComparison.OrdinalIgnoreCase);
if (_endpoint is null || _apiKey is null)
{
SkipReason =
$"Skipped: set {EnvEndpoint} (e.g. https://wonder-sql-vd03:5222) and {EnvApiKey} " +
$"(a histgw_<id>_<secret> key with historian:read + historian:write scopes) to run the live validation suite.";
return;
}
if (!Uri.TryCreate(_endpoint, UriKind.Absolute, out var uri))
{
SkipReason = $"Skipped: {EnvEndpoint}='{_endpoint}' is not an absolute URI (expected e.g. https://wonder-sql-vd03:5222).";
return;
}
// Bounded TCP probe — a configured-but-unreachable gateway (VPN down) skips, never hangs.
SkipReason = ProbeReachable(uri.Host, uri.Port)
? null
: $"Skipped: gateway {uri.Host}:{uri.Port} (from {EnvEndpoint}) did not accept a TCP connection within " +
$"{ProbeTimeout.TotalSeconds:0}s. It is reachable only on the corporate VPN — connect the VPN (host wonder-sql-vd03) and re-run.";
}
/// <summary>Non-null when the suite must skip (unconfigured, malformed endpoint, or unreachable host).</summary>
public string? SkipReason { get; }
/// <summary>Convenience flag: true when env config is absent / malformed / unreachable.</summary>
public bool NotConfigured => SkipReason is not null;
/// <summary>The existing Galaxy / historian tag for the read round-trip (<c>HISTGW_TEST_TAG</c>); null when unset.</summary>
public string? TestTag { get; }
/// <summary>The Float sandbox tag for the write round-trip (<c>HISTGW_WRITE_SANDBOX_TAG</c>); null when unset.</summary>
public string? WriteSandboxTag { get; }
/// <summary>The source name for the alarm round-trip (<c>HISTGW_ALARM_SOURCE</c>); null when unset.</summary>
public string? AlarmSource { get; }
/// <summary>
/// Builds the bound <see cref="ServerHistorianOptions"/> from env. Only valid when
/// <see cref="NotConfigured"/> is false (the endpoint + key are non-null by then).
/// </summary>
public ServerHistorianOptions BuildOptions()
{
var useTls = Uri.TryCreate(_endpoint, UriKind.Absolute, out var uri)
&& string.Equals(uri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase);
return new ServerHistorianOptions
{
Enabled = true,
Endpoint = _endpoint!,
ApiKey = _apiKey!,
UseTls = useTls,
AllowUntrustedServerCertificate = _allowUntrusted,
CallTimeout = CallTimeout,
};
}
/// <summary>
/// Creates a fresh real adapter over the package gateway client. Each caller owns and disposes
/// its own adapter (the data source / writers each take exclusive ownership of their client).
/// </summary>
public HistorianGatewayClientAdapter CreateClient() =>
HistorianGatewayClientAdapter.Create(BuildOptions(), NullLoggerFactory.Instance);
/// <summary>Creates a fresh real <see cref="GatewayHistorianDataSource"/> over its own adapter.</summary>
public GatewayHistorianDataSource CreateDataSource() =>
new(CreateClient(), NullLogger<GatewayHistorianDataSource>.Instance);
private static string? Trimmed(string envVar)
{
var value = Environment.GetEnvironmentVariable(envVar);
return string.IsNullOrWhiteSpace(value) ? null : value.Trim();
}
/// <summary>
/// Bounded, never-throwing TCP connectivity probe to <paramref name="host"/>:<paramref name="port"/>.
/// Returns true only on a connection accepted within <see cref="ProbeTimeout"/>.
/// </summary>
private static bool ProbeReachable(string host, int port)
{
try
{
using var client = new TcpClient();
var connect = client.ConnectAsync(host, port);
return connect.Wait(ProbeTimeout) && client.Connected;
}
catch
{
// Timeout, connection refused, DNS failure, … → unreachable (skip, never fail/hang).
return false;
}
}
}
@@ -0,0 +1,204 @@
using Microsoft.Extensions.Logging.Abstractions;
using Shouldly;
using Xunit;
using ZB.MOM.WW.HistorianGateway.Contracts.Grpc;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions.Historian;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Recorder;
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Gateway.Tests.Live;
/// <summary>
/// End-to-end live validation of the gateway-backed historian backend against a real, running
/// <c>ZB.MOM.WW.HistorianGateway</c> sidecar (typically <c>wonder-sql-vd03</c> on the VPN). This is
/// the validation gate the operator runs on the VPN before the Wonderware backend's retirement is
/// trusted — every path exercised here is a <em>real</em> driver component, not a fake:
/// <list type="bullet">
/// <item><see cref="GatewayHistorianDataSource"/> — the read + <c>ReadEvents</c> path.</item>
/// <item><see cref="HistorianGatewayClientAdapter"/> — the <c>EnsureTags</c> seam.</item>
/// <item><see cref="GatewayHistorianValueWriter"/> — the recorder's <c>WriteLiveValues</c> path.</item>
/// <item><see cref="GatewayAlarmHistorianWriter"/> — the alarm <c>SendEvent</c> path.</item>
/// </list>
/// <para>
/// <b>Env-gated + skip-clean.</b> Every test calls <c>Assert.Skip</c> via the fixture when the
/// suite is unconfigured / the gateway is unreachable, and again when its own required tag /
/// source env var is absent — so <c>dotnet test --filter "Category=LiveIntegration"</c> stays
/// green offline (all skip, none fail). See <see cref="GatewayLiveFixture"/> for the env vars.
/// </para>
/// <para>
/// <b>Gateway prerequisites</b> (when run on the VPN): the target gateway must run with
/// <c>RuntimeDb:Enabled=true</c> (the <c>WriteLiveValues</c> SQL path) and
/// <c>RuntimeDb:EventReadsEnabled=true</c> (the SQL <c>ReadEvents</c> path), and the API key
/// must carry the <c>historian:read</c> + <c>historian:write</c> scopes.
/// </para>
/// </summary>
[Trait("Category", "LiveIntegration")]
public sealed class GatewayLiveIntegrationTests(GatewayLiveFixture fixture) : IClassFixture<GatewayLiveFixture>
{
private readonly GatewayLiveFixture _fx = fixture;
/// <summary>
/// Read round-trip — <c>ReadRaw</c> for an existing tag over the last hour through the real
/// <see cref="GatewayHistorianDataSource"/>. Asserts the read completes without throwing and
/// returns a (possibly empty) sample set: a sparse tag legitimately has zero samples in the
/// window, so the meaningful live signal is "the gateway answered, not faulted".
/// </summary>
[Fact]
[Trait("Category", "LiveIntegration")]
public async Task Galaxy_tag_read_round_trip()
{
if (_fx.NotConfigured) Assert.Skip(_fx.SkipReason!);
if (_fx.TestTag is null)
Assert.Skip("Skipped: set HISTGW_TEST_TAG to an existing Galaxy/historian tag to run the read round-trip.");
var ct = TestContext.Current.CancellationToken;
await using var dataSource = _fx.CreateDataSource();
var endUtc = DateTime.UtcNow;
var startUtc = endUtc - TimeSpan.FromHours(1);
var result = await dataSource.ReadRawAsync(_fx.TestTag, startUtc, endUtc, maxValuesPerNode: 1000, ct);
result.ShouldNotBeNull();
result.Samples.Count.ShouldBeGreaterThanOrEqualTo(0, "a live ReadRaw must answer (zero samples is a valid sparse-tag result, not a fault)");
TestContext.Current.SendDiagnosticMessage(
$"read round-trip: ReadRaw('{_fx.TestTag}', last 1h) returned {result.Samples.Count} sample(s).");
}
/// <summary>
/// Write round-trip — <c>EnsureTags</c> (Float) → <c>WriteLiveValues</c> (a known value via the
/// real recorder writer) → <c>ReadRaw</c> the recent window and assert the written sample is
/// present. Requires the gateway running <c>RuntimeDb:Enabled=true</c> and that <c>EnsureTags</c>
/// provisioned the tag (the SQL live-write path only accepts provisioned analog tags). The write
/// value is an exact-in-float integer so the float-precision round-trip compares cleanly.
/// </summary>
[Fact]
[Trait("Category", "LiveIntegration")]
public async Task Write_then_read_on_sandbox_tag()
{
if (_fx.NotConfigured) Assert.Skip(_fx.SkipReason!);
if (_fx.WriteSandboxTag is null)
Assert.Skip("Skipped: set HISTGW_WRITE_SANDBOX_TAG to a writable Float sandbox tag (e.g. HistGW.LiveTest.Sandbox) to run the write round-trip.");
var ct = TestContext.Current.CancellationToken;
var tag = _fx.WriteSandboxTag;
// A value that is exactly representable in float32 (integer < 2^24) so the analog
// store/read round-trip is not muddied by single-precision rounding. The millisecond
// component keeps consecutive runs from colliding on the same value.
const ushort goodQuality = 192; // OPC-DA "Good" floor.
var writeUtc = DateTime.UtcNow;
double written = 1_000_000 + writeUtc.Millisecond;
// EnsureTags (Float) through the real adapter seam — create-or-update, idempotent for an
// already-provisioned sandbox tag.
await using var writeClient = _fx.CreateClient();
var ensure = await writeClient.EnsureTagsAsync(
new[]
{
new HistorianTagDefinition
{
TagName = tag,
DataType = HistorianDataType.Float,
EngineeringUnit = string.Empty,
Description = "OtOpcUa live validation sandbox",
},
},
ct);
ensure.ShouldNotBeNull();
// WriteLiveValues through the real recorder writer (SQL live-write path).
var valueWriter = new GatewayHistorianValueWriter(writeClient, NullLogger<GatewayHistorianValueWriter>.Instance);
var acked = await valueWriter.WriteLiveValuesAsync(
tag, new[] { new HistorizationValue(writeUtc, written, goodQuality) }, ct);
acked.ShouldBeTrue(
"the live write must be acked — needs the gateway running RuntimeDb:Enabled=true and the tag EnsureTags-provisioned.");
// Read the written value back over a recent window. The SQL write can lag the read by a flush
// cadence, so poll briefly rather than asserting on the first read.
await using var dataSource = _fx.CreateDataSource();
DataValueSnapshot? hit = null;
var deadline = DateTime.UtcNow + TimeSpan.FromSeconds(15);
do
{
var read = await dataSource.ReadRawAsync(
tag, writeUtc - TimeSpan.FromMinutes(5), DateTime.UtcNow + TimeSpan.FromMinutes(1), maxValuesPerNode: 10_000, ct);
hit = read.Samples.FirstOrDefault(s => s.Value is double d && Math.Abs(d - written) < 0.5);
if (hit is not null) break;
await Task.Delay(TimeSpan.FromSeconds(1), ct);
}
while (DateTime.UtcNow < deadline);
hit.ShouldNotBeNull(
$"the written sample ({written}) should be readable back from '{tag}' within the recent window (gateway needs RuntimeDb:Enabled=true).");
TestContext.Current.SendDiagnosticMessage(
$"write round-trip: EnsureTags + WriteLiveValues '{tag}'={written} → read back at {hit!.SourceTimestampUtc:O}.");
}
/// <summary>
/// Alarm round-trip — <c>SendEvent</c> for the configured source through the real
/// <see cref="GatewayAlarmHistorianWriter"/>, then <c>ReadEvents</c> the recent window for that
/// source through the real <see cref="GatewayHistorianDataSource"/> and assert the event is
/// present. Requires the gateway running <c>RuntimeDb:EventReadsEnabled=true</c> (the SQL
/// alarm-history read path). Presence is asserted as "at least one event for the source surfaced
/// in the post-send window" (the data source filters by source); the exact AlarmId / message
/// match is surfaced as a diagnostic, since the SQL event store may re-key the row.
/// </summary>
[Fact]
[Trait("Category", "LiveIntegration")]
public async Task Alarm_SendEvent_then_ReadEvents()
{
if (_fx.NotConfigured) Assert.Skip(_fx.SkipReason!);
if (_fx.AlarmSource is null)
Assert.Skip("Skipped: set HISTGW_ALARM_SOURCE to a source name to run the alarm SendEvent → ReadEvents round-trip.");
var ct = TestContext.Current.CancellationToken;
var source = _fx.AlarmSource;
var alarmId = "OtOpcUaLive-" + Guid.NewGuid().ToString("N");
var eventUtc = DateTime.UtcNow;
var alarm = new AlarmHistorianEvent(
AlarmId: alarmId,
EquipmentPath: source, // becomes the wire event's SourceName / SQL Source_Object filter key.
AlarmName: "OtOpcUaLiveValidation",
AlarmTypeName: "LimitAlarm",
Severity: AlarmSeverity.High,
EventKind: "Activated",
Message: "OtOpcUa live validation event",
User: "system",
Comment: null,
TimestampUtc: eventUtc);
// SendEvent through the real alarm writer (never throws — returns a per-event outcome).
using var alarmClient = _fx.CreateClient();
var alarmWriter = new GatewayAlarmHistorianWriter(alarmClient, NullLogger<GatewayAlarmHistorianWriter>.Instance);
var outcomes = await alarmWriter.WriteBatchAsync(new[] { alarm }, ct);
outcomes.ShouldHaveSingleItem().ShouldBe(
HistorianWriteOutcome.Ack,
"the alarm SendEvent must be acked — needs the gateway write scope (historian:write) and SendEvent path.");
// Read the event back over a recent window for the source. The SQL event write can lag, so poll.
await using var dataSource = _fx.CreateDataSource();
IReadOnlyList<HistoricalEvent> events = Array.Empty<HistoricalEvent>();
var deadline = DateTime.UtcNow + TimeSpan.FromSeconds(15);
do
{
var read = await dataSource.ReadEventsAsync(
source, eventUtc - TimeSpan.FromMinutes(5), DateTime.UtcNow + TimeSpan.FromMinutes(1), maxEvents: 0, ct);
events = read.Events;
if (events.Count > 0) break;
await Task.Delay(TimeSpan.FromSeconds(1), ct);
}
while (DateTime.UtcNow < deadline);
events.Count.ShouldBeGreaterThan(0,
$"the SendEvent for source '{source}' should be readable back via ReadEvents (gateway needs RuntimeDb:EventReadsEnabled=true).");
var exactMatch = events.Any(e => string.Equals(e.EventId, alarmId, StringComparison.Ordinal));
TestContext.Current.SendDiagnosticMessage(
$"alarm round-trip: SendEvent source='{source}' id={alarmId} → ReadEvents returned {events.Count} event(s); exact-id match={exactMatch}.");
}
}