feat(observability): F13d Prometheus + OpenTelemetry instrumentation
v2-ci / build (push) Failing after 38s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (push) Has been skipped
v2-ci / build (push) Failing after 38s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (push) Has been skipped
OtOpcUaTelemetry (Commons/Observability) centralizes the project's Meter + ActivitySource so all instrumentation points emit through a single named surface. Counters cover the hot paths: otopcua.deploy.applied (outcome=ack|reject) otopcua.deploy.apply.duration (s, histogram) otopcua.driver.lifecycle (event=spawn|spawn_stub|stop|fault) otopcua.virtualtag.eval (outcome=ok|fail|skip) otopcua.scriptedalarm.transition (state=activated|acknowledged|cleared) otopcua.opcua.sink.write (kind=value|alarm|rebuild) otopcua.redundancy.service_level_change (level=byte) Plus two ActivitySource spans: otopcua.deploy.apply wraps DriverHostActor.ApplyAndAck otopcua.opcua.address_space_rebuild wraps OpcUaPublishActor.HandleRebuild Instruments are no-op until a listener attaches, so tests + dev hosts pay nothing for unread telemetry. Host Program.cs gains AddOtOpcUaObservability() (binds the OtOpcUa Meter + ActivitySource to OpenTelemetry, attaches a Prometheus exporter) and MapOtOpcUaMetrics() (mounts /metrics scrape endpoint). Driver-side internals + ASP.NET request metrics deliberately stay off — the scrape payload is scoped to OtOpcUa signals only. Tests use MeterListener + ActivityListener to verify VirtualTagActor.eval, OpcUaPublishActor.AttributeValueUpdate, and RebuildAddressSpace actually emit on the central instruments. Runtime suite is 72 / 72 green (+3). Closes #105. Path A (F13b/c/d) complete; next batch options: #85 UNS folder hierarchy in SDK, or F8b/F9b production engine bindings.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Event;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Observability;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Types;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
@@ -82,6 +83,9 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
|
||||
_driver = driver;
|
||||
_driverInstanceId = driver.DriverInstanceId;
|
||||
_reconnectInterval = reconnectInterval;
|
||||
OtOpcUaTelemetry.DriverInstanceLifecycle.Add(1,
|
||||
new KeyValuePair<string, object?>("event", startStubbed ? "spawn_stub" : "spawn"),
|
||||
new KeyValuePair<string, object?>("driver_type", driver.DriverType));
|
||||
if (startStubbed)
|
||||
{
|
||||
Context.GetLogger().Info("[DEV-STUB] driver={Name} type={Type}",
|
||||
@@ -314,5 +318,8 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers
|
||||
DetachSubscription();
|
||||
try { _driver.ShutdownAsync(CancellationToken.None).GetAwaiter().GetResult(); }
|
||||
catch (Exception ex) { _log.Warning(ex, "DriverInstance {Id}: ShutdownAsync threw on PostStop", _driverInstanceId); }
|
||||
OtOpcUaTelemetry.DriverInstanceLifecycle.Add(1,
|
||||
new KeyValuePair<string, object?>("event", "stop"),
|
||||
new KeyValuePair<string, object?>("driver_type", _driver.DriverType));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user