feat(observability): F13d Prometheus + OpenTelemetry instrumentation
Some checks failed
v2-ci / build (push) Failing after 38s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (push) Has been skipped

OtOpcUaTelemetry (Commons/Observability) centralizes the project's Meter
+ ActivitySource so all instrumentation points emit through a single
named surface. Counters cover the hot paths:

  otopcua.deploy.applied               (outcome=ack|reject)
  otopcua.deploy.apply.duration        (s, histogram)
  otopcua.driver.lifecycle             (event=spawn|spawn_stub|stop|fault)
  otopcua.virtualtag.eval              (outcome=ok|fail|skip)
  otopcua.scriptedalarm.transition     (state=activated|acknowledged|cleared)
  otopcua.opcua.sink.write             (kind=value|alarm|rebuild)
  otopcua.redundancy.service_level_change (level=byte)

Plus two ActivitySource spans:

  otopcua.deploy.apply                 wraps DriverHostActor.ApplyAndAck
  otopcua.opcua.address_space_rebuild  wraps OpcUaPublishActor.HandleRebuild

Instruments are no-op until a listener attaches, so tests + dev hosts
pay nothing for unread telemetry.

Host Program.cs gains AddOtOpcUaObservability() (binds the OtOpcUa Meter
+ ActivitySource to OpenTelemetry, attaches a Prometheus exporter) and
MapOtOpcUaMetrics() (mounts /metrics scrape endpoint). Driver-side
internals + ASP.NET request metrics deliberately stay off — the scrape
payload is scoped to OtOpcUa signals only.

Tests use MeterListener + ActivityListener to verify
VirtualTagActor.eval, OpcUaPublishActor.AttributeValueUpdate, and
RebuildAddressSpace actually emit on the central instruments. Runtime
suite is 72 / 72 green (+3).

Closes #105. Path A (F13b/c/d) complete; next batch options: #85 UNS
folder hierarchy in SDK, or F8b/F9b production engine bindings.
This commit is contained in:
Joseph Doherty
2026-05-26 10:29:40 -04:00
parent 21eac21409
commit 52997ee164
10 changed files with 352 additions and 3 deletions

View File

@@ -0,0 +1,177 @@
using System.Diagnostics;
using System.Diagnostics.Metrics;
using Akka.Actor;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Engines;
using ZB.MOM.WW.OtOpcUa.Commons.Observability;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Runtime.OpcUa;
using ZB.MOM.WW.OtOpcUa.Runtime.Tests.Harness;
using ZB.MOM.WW.OtOpcUa.Runtime.VirtualTags;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Tests.Observability;
/// <summary>
/// F13d — verifies the instrumentation sites actually emit on the central
/// <see cref="OtOpcUaTelemetry"/> meter + activity source. Each test attaches a one-shot
/// listener, exercises the instrumented path, then asserts the recorded measurement matches.
/// </summary>
public sealed class OtOpcUaTelemetryHookTests : RuntimeActorTestBase
{
[Fact]
public void VirtualTagActor_evaluation_emits_otopcua_virtualtag_eval_counter()
{
using var recorder = new MeterRecorder("otopcua.virtualtag.eval");
var parent = CreateTestProbe();
var evaluator = new ConstEval(42);
var actor = parent.ChildActorOf(VirtualTagActor.Props("vt-tel-1", "expr", evaluator: evaluator));
actor.Tell(new VirtualTagActor.DependencyValueChanged("a", 1, DateTime.UtcNow));
parent.ExpectMsg<VirtualTagActor.EvaluationResult>();
recorder.Total.ShouldBeGreaterThanOrEqualTo(1);
recorder.WithTag("outcome", "ok").ShouldBeGreaterThanOrEqualTo(1);
}
[Fact]
public void OpcUaPublishActor_AttributeValueUpdate_emits_sink_write_counter()
{
using var recorder = new MeterRecorder("otopcua.opcua.sink.write");
var sink = new RecordingSink();
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
sink: sink,
serviceLevel: NullServiceLevelPublisher.Instance,
subscribeRedundancyTopic: false,
localNode: Commons.Types.NodeId.Parse("test-node")));
actor.Tell(new OpcUaPublishActor.AttributeValueUpdate(
NodeId: "ns=2;s=tag-1",
Value: 42,
Quality: OpcUaQuality.Good,
TimestampUtc: DateTime.UtcNow));
AwaitAssertion(() =>
{
recorder.Total.ShouldBeGreaterThanOrEqualTo(1);
recorder.WithTag("kind", "value").ShouldBeGreaterThanOrEqualTo(1);
});
}
[Fact]
public void RebuildAddressSpace_starts_an_address_space_rebuild_span()
{
using var spanRecorder = new ActivityRecorder("otopcua.opcua.address_space_rebuild");
var sink = new RecordingSink();
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
sink: sink,
serviceLevel: NullServiceLevelPublisher.Instance,
subscribeRedundancyTopic: false,
localNode: Commons.Types.NodeId.Parse("test-node")));
actor.Tell(new OpcUaPublishActor.RebuildAddressSpace(Commons.Types.CorrelationId.NewId()));
AwaitAssertion(() => spanRecorder.Activities.ShouldContain(a => a.OperationName == "otopcua.opcua.address_space_rebuild"));
}
private void AwaitAssertion(Action assertion)
{
var deadline = DateTime.UtcNow.AddSeconds(2);
Exception? last = null;
while (DateTime.UtcNow < deadline)
{
try { assertion(); return; }
catch (Exception ex) { last = ex; Thread.Sleep(25); }
}
if (last is not null) throw last;
}
/// <summary>Listens to a single instrument by name and tallies the values + tags.</summary>
private sealed class MeterRecorder : IDisposable
{
private readonly string _name;
private readonly MeterListener _listener;
private long _total;
private readonly List<KeyValuePair<string, object?>[]> _tagSets = new();
private readonly object _gate = new();
public MeterRecorder(string instrumentName)
{
_name = instrumentName;
_listener = new MeterListener
{
InstrumentPublished = (instrument, listener) =>
{
if (instrument.Meter.Name == OtOpcUaTelemetry.MeterName && instrument.Name == _name)
listener.EnableMeasurementEvents(instrument);
}
};
_listener.SetMeasurementEventCallback<long>((_, value, tags, _) =>
{
lock (_gate)
{
_total += value;
_tagSets.Add(tags.ToArray());
}
});
_listener.Start();
}
public long Total { get { lock (_gate) return _total; } }
public int WithTag(string key, string value)
{
lock (_gate)
{
return _tagSets.Count(set => set.Any(t => t.Key == key && Equals(t.Value, value)));
}
}
public void Dispose() => _listener.Dispose();
}
/// <summary>Listens to a single ActivitySource by name and stores started Activities.</summary>
private sealed class ActivityRecorder : IDisposable
{
private readonly string _operationName;
private readonly ActivityListener _listener;
private readonly List<Activity> _activities = new();
private readonly object _gate = new();
public ActivityRecorder(string operationName)
{
_operationName = operationName;
_listener = new ActivityListener
{
ShouldListenTo = source => source.Name == OtOpcUaTelemetry.ActivitySourceName,
Sample = (ref ActivityCreationOptions<ActivityContext> _) => ActivitySamplingResult.AllData,
ActivityStarted = activity =>
{
if (activity.OperationName == _operationName)
{
lock (_gate) _activities.Add(activity);
}
}
};
ActivitySource.AddActivityListener(_listener);
}
public IReadOnlyList<Activity> Activities { get { lock (_gate) return _activities.ToArray(); } }
public void Dispose() => _listener.Dispose();
}
private sealed class ConstEval(object? value) : IVirtualTagEvaluator
{
public VirtualTagEvalResult Evaluate(string virtualTagId, string expression, IReadOnlyDictionary<string, object?> dependencies)
=> VirtualTagEvalResult.Ok(value);
}
private sealed class RecordingSink : IOpcUaAddressSpaceSink
{
public int Writes { get; private set; }
public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc) => Writes++;
public void WriteAlarmState(string alarmNodeId, bool active, bool acknowledged, DateTime occurredUtc) => Writes++;
public void RebuildAddressSpace() { /* recorded via span */ }
}
}