feat(scadabridge): wire scadabridge.store_and_forward.queue.depth gauge to buffered count
This commit is contained in:
@@ -0,0 +1,176 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// WP-14 (telemetry follow-on): verifies the cached buffered-message counter that
|
||||
/// backs the <c>scadabridge.store_and_forward.queue.depth</c> observable gauge tracks
|
||||
/// the live (Pending) queue across the existing enqueue / drain / park / requeue paths,
|
||||
/// and that the sync gauge callback reports it.
|
||||
///
|
||||
/// The gauge is read the way the OpenTelemetry collector reads it — via a
|
||||
/// <see cref="MeterListener"/> that forces an observation (the callback is synchronous
|
||||
/// and does no I/O, which is the whole point of caching the count). <see cref="StartAsync"/>
|
||||
/// seeds the counter from storage and registers the provider against this service
|
||||
/// instance, so the gauge resolves to this test's counter.
|
||||
/// </summary>
|
||||
public class QueueDepthGaugeTests : IAsyncLifetime, IDisposable
|
||||
{
|
||||
private readonly SqliteConnection _keepAlive;
|
||||
private readonly StoreAndForwardStorage _storage;
|
||||
private readonly StoreAndForwardService _service;
|
||||
|
||||
public QueueDepthGaugeTests()
|
||||
{
|
||||
var dbName = $"QueueDepthTests_{Guid.NewGuid():N}";
|
||||
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||
_keepAlive = new SqliteConnection(connStr);
|
||||
_keepAlive.Open();
|
||||
|
||||
_storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||
|
||||
var options = new StoreAndForwardOptions
|
||||
{
|
||||
DefaultRetryInterval = TimeSpan.Zero,
|
||||
DefaultMaxRetries = 3,
|
||||
// Long interval so no background sweep fires on its own during the test;
|
||||
// sweeps are driven explicitly via RetryPendingMessagesAsync.
|
||||
RetryTimerInterval = TimeSpan.FromMinutes(10)
|
||||
};
|
||||
|
||||
_service = new StoreAndForwardService(
|
||||
_storage, options, NullLogger<StoreAndForwardService>.Instance);
|
||||
}
|
||||
|
||||
public async Task InitializeAsync()
|
||||
{
|
||||
await _storage.InitializeAsync();
|
||||
// StartAsync seeds _bufferedCount from the (empty) store and registers the
|
||||
// queue-depth provider against this service instance.
|
||||
await _service.StartAsync();
|
||||
}
|
||||
|
||||
public async Task DisposeAsync() => await _service.StopAsync();
|
||||
|
||||
public void Dispose() => _keepAlive.Dispose();
|
||||
|
||||
/// <summary>
|
||||
/// Reads the current value of the <c>scadabridge.store_and_forward.queue.depth</c>
|
||||
/// gauge by forcing a synchronous observation through a transient MeterListener —
|
||||
/// exactly the path the Prometheus/OTLP collector exercises on each scrape.
|
||||
/// </summary>
|
||||
private static long ReadQueueDepthGauge()
|
||||
{
|
||||
long observed = -1;
|
||||
using var listener = new MeterListener
|
||||
{
|
||||
InstrumentPublished = (instrument, l) =>
|
||||
{
|
||||
if (instrument.Meter.Name == ScadaBridgeTelemetry.MeterName &&
|
||||
instrument.Name == "scadabridge.store_and_forward.queue.depth")
|
||||
{
|
||||
l.EnableMeasurementEvents(instrument);
|
||||
}
|
||||
}
|
||||
};
|
||||
listener.SetMeasurementEventCallback<long>((_, measurement, _, _) => observed = measurement);
|
||||
listener.Start();
|
||||
listener.RecordObservableInstruments();
|
||||
return observed;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Gauge_TracksBufferedDepth_AcrossEnqueueDrainAndPark()
|
||||
{
|
||||
// Empty store seeded at StartAsync → gauge reports 0.
|
||||
Assert.Equal(0, ReadQueueDepthGauge());
|
||||
|
||||
// A handler that fails transiently so each enqueue buffers a Pending row
|
||||
// (immediate attempt 0 throws → BufferAsync → +1).
|
||||
var deliver = false;
|
||||
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
|
||||
_ =>
|
||||
{
|
||||
if (!deliver) throw new HttpRequestException("transient");
|
||||
return Task.FromResult(true);
|
||||
});
|
||||
|
||||
// Enqueue 3 → cached depth = 3 → gauge reports 3.
|
||||
for (var i = 0; i < 3; i++)
|
||||
{
|
||||
var r = await _service.EnqueueAsync(
|
||||
StoreAndForwardCategory.ExternalSystem, "api", """{}""");
|
||||
Assert.True(r.WasBuffered);
|
||||
}
|
||||
Assert.Equal(3, ReadQueueDepthGauge());
|
||||
|
||||
// Drain: handler now succeeds → the retry sweep removes all 3 Pending rows → depth 0.
|
||||
deliver = true;
|
||||
await _service.RetryPendingMessagesAsync();
|
||||
Assert.Equal(0, ReadQueueDepthGauge());
|
||||
|
||||
// Park path: buffer one more, then make it park (maxRetries:1 parks after one
|
||||
// sweep). Pending→Parked leaves the live queue → depth back to 0.
|
||||
deliver = false;
|
||||
var parkResult = await _service.EnqueueAsync(
|
||||
StoreAndForwardCategory.ExternalSystem, "api", """{}""", maxRetries: 1);
|
||||
Assert.True(parkResult.WasBuffered);
|
||||
Assert.Equal(1, ReadQueueDepthGauge());
|
||||
|
||||
await _service.RetryPendingMessagesAsync();
|
||||
var parked = await _storage.GetMessageByIdAsync(parkResult.MessageId);
|
||||
Assert.Equal(StoreAndForwardMessageStatus.Parked, parked!.Status);
|
||||
Assert.Equal(0, ReadQueueDepthGauge());
|
||||
|
||||
// Operator requeue: Parked→Pending re-adds to the live queue → depth 1.
|
||||
Assert.True(await _service.RetryParkedMessageAsync(parkResult.MessageId));
|
||||
Assert.Equal(1, ReadQueueDepthGauge());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Gauge_SeedsFromExistingPendingRows_OnStart()
|
||||
{
|
||||
// Pre-seed two Pending rows directly in storage *before* a fresh service starts,
|
||||
// simulating a process restart over a non-empty buffer. StartAsync must seed the
|
||||
// cached counter from the store so the gauge does not under-report on restart.
|
||||
await _storage.EnqueueAsync(new StoreAndForwardMessage
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
Category = StoreAndForwardCategory.ExternalSystem,
|
||||
Target = "api",
|
||||
PayloadJson = "{}",
|
||||
Status = StoreAndForwardMessageStatus.Pending,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
MaxRetries = 3
|
||||
});
|
||||
await _storage.EnqueueAsync(new StoreAndForwardMessage
|
||||
{
|
||||
Id = Guid.NewGuid().ToString("N"),
|
||||
Category = StoreAndForwardCategory.Notification,
|
||||
Target = "list",
|
||||
PayloadJson = "{}",
|
||||
Status = StoreAndForwardMessageStatus.Pending,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
MaxRetries = 3
|
||||
});
|
||||
|
||||
var fresh = new StoreAndForwardService(
|
||||
_storage,
|
||||
new StoreAndForwardOptions { RetryTimerInterval = TimeSpan.FromMinutes(10) },
|
||||
NullLogger<StoreAndForwardService>.Instance);
|
||||
try
|
||||
{
|
||||
await fresh.StartAsync();
|
||||
// The fresh service registered itself as the global provider and seeded 2.
|
||||
Assert.Equal(2, ReadQueueDepthGauge());
|
||||
}
|
||||
finally
|
||||
{
|
||||
await fresh.StopAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user