feat(scadabridge): emit scadabridge.deployments.applied on deployment success

This commit is contained in:
Joseph Doherty
2026-06-01 16:52:09 -04:00
parent c41cb41c7b
commit 877f2e200b
2 changed files with 113 additions and 0 deletions
@@ -7,6 +7,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle;
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
@@ -244,6 +245,16 @@ public class DeploymentService
if (response.Status == DeploymentStatus.Success)
{
// Telemetry: one instance deployment successfully applied to a
// site. Counted once per successful deploy operation (the unit
// of scadabridge.deployments.applied — one DeployInstanceAsync
// deploys exactly one instance to one site). Emitted only on this
// confirmed-Success path, so failures, timeouts/retries (the
// catch block), and the reconciliation path (which recovers a
// PRIOR timed-out apply rather than performing a fresh one) do
// not increment it.
ScadaBridgeTelemetry.RecordDeploymentApplied();
// The site has applied the deployment. The post-success
// persistence below is best-effort: a failure here must be
// logged loudly for operator reconciliation but must not flip
@@ -1,3 +1,4 @@
using System.Diagnostics.Metrics;
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
@@ -10,6 +11,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle;
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
@@ -558,6 +560,106 @@ public class DeploymentServiceTests : TestKit
Arg.Any<object>(), Arg.Any<CancellationToken>());
}
// ── Telemetry follow-on: scadabridge.deployments.applied on deploy success ──
[Fact]
public async Task DeployInstanceAsync_SiteSucceeds_EmitsDeploymentsAppliedCounterOnce()
{
// A successful deployment must increment the
// scadabridge.deployments.applied counter exactly once — one
// DeployInstanceAsync deploys one instance to one site, so the unit is
// one increment per successful deploy operation.
var instance = new Instance("MetricInst") { Id = 55, SiteId = 1, State = InstanceState.NotDeployed };
_repo.GetInstanceByIdAsync(55, Arg.Any<CancellationToken>()).Returns(instance);
SetupValidPipeline(55, "MetricInst", "sha256:target");
_repo.GetCurrentDeploymentStatusAsync(55, Arg.Any<CancellationToken>())
.Returns((DeploymentRecord?)null);
var counters = new ReconcileProbeCounters();
var commActor = Sys.ActorOf(Props.Create(() =>
new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false)));
var service = CreateServiceWithCommActor(commActor);
long applied = 0;
using var listener = new MeterListener
{
InstrumentPublished = (instrument, l) =>
{
if (instrument.Meter.Name == ScadaBridgeTelemetry.MeterName
&& instrument.Name == "scadabridge.deployments.applied")
{
l.EnableMeasurementEvents(instrument);
}
}
};
listener.SetMeasurementEventCallback<long>((_, measurement, _, _) =>
Interlocked.Add(ref applied, measurement));
listener.Start();
var result = await service.DeployInstanceAsync(55, "admin");
listener.Dispose();
Assert.True(result.IsSuccess);
// Fresh first-time deploy applied -> exactly one increment.
Assert.Equal(1, Interlocked.Read(ref applied));
}
[Fact]
public async Task DeployInstanceAsync_Reconciled_DoesNotEmitDeploymentsAppliedCounter()
{
// The reconciliation path recovers a PRIOR timed-out apply rather than
// performing a fresh one; counting it would risk double-counting the
// original apply, so scadabridge.deployments.applied must NOT increment
// on a reconciled (no re-deploy) success.
var instance = new Instance("MetricReconcileInst")
{
Id = 56, SiteId = 1, State = InstanceState.NotDeployed
};
_repo.GetInstanceByIdAsync(56, Arg.Any<CancellationToken>()).Returns(instance);
SetupValidPipeline(56, "MetricReconcileInst", "sha256:target");
var prior = new DeploymentRecord("dep-prior-56", "admin")
{
InstanceId = 56,
Status = DeploymentStatus.InProgress,
RevisionHash = "sha256:target"
};
_repo.GetCurrentDeploymentStatusAsync(56, Arg.Any<CancellationToken>()).Returns(prior);
_repo.GetDeployedSnapshotByInstanceIdAsync(56, Arg.Any<CancellationToken>())
.Returns((DeployedConfigSnapshot?)null);
var counters = new ReconcileProbeCounters();
var commActor = Sys.ActorOf(Props.Create(() =>
new ReconcileProbeActor(counters, siteHash: "sha256:target", failQuery: false)));
var service = CreateServiceWithCommActor(commActor);
long applied = 0;
using var listener = new MeterListener
{
InstrumentPublished = (instrument, l) =>
{
if (instrument.Meter.Name == ScadaBridgeTelemetry.MeterName
&& instrument.Name == "scadabridge.deployments.applied")
{
l.EnableMeasurementEvents(instrument);
}
}
};
listener.SetMeasurementEventCallback<long>((_, measurement, _, _) =>
Interlocked.Add(ref applied, measurement));
listener.Start();
var result = await service.DeployInstanceAsync(56, "admin");
listener.Dispose();
Assert.True(result.IsSuccess);
// Reconciled — no fresh deploy was sent, so no increment.
Assert.Equal(0, counters.DeployCount);
Assert.Equal(0, Interlocked.Read(ref applied));
}
// ── DeploymentManager-011: lifecycle success paths ──
[Fact]