feat(scripts): realign Test Run with runtime API, add anonymous-object calls and instance binding

The Test Run sandbox and Monaco analysis modelled a script API that had
drifted from the site runtime's ScriptGlobals, so real scripts failed to
compile in Test Run. Realign both to the runtime surface
(Instance/Scripts/ExternalSystem/Attributes/Children/Parent) and drop the
duplicate ScriptHost stub so the two cannot diverge again.

- Script calls (Scripts.CallShared, Instance.CallScript, Route.To().Call)
  accept an anonymous object instead of a hand-built dictionary, via a
  shared ScriptArgs normalizer; existing dictionary calls still compile.
- Test Run can optionally bind to a deployed instance, so Instance/
  Attributes/CallScript route to it cross-site; adds site-side
  RouteToGetAttributes/RouteToSetAttributes handlers.
- Adds Test Run panels to the API method and template script editors.
- Fixes the TestDatabaseQuery seed script, which queried a table that
  never existed.

Also commits unrelated in-progress work already in the tree: the health
monitoring report loop, site streaming changes, and the Admin/Design
data-connection and SMTP page reorganization.
This commit is contained in:
Joseph Doherty
2026-05-16 03:37:56 -04:00
parent d7b05b40e9
commit 295150751f
50 changed files with 2926 additions and 550 deletions

View File

@@ -47,6 +47,7 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
SiteId = report.SiteId,
LatestReport = report,
LastReportReceivedAt = now,
LastHeartbeatAt = now,
LastSequenceNumber = report.SequenceNumber,
IsOnline = true
};
@@ -64,6 +65,7 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
var wasOffline = !existing.IsOnline;
existing.LatestReport = report;
existing.LastReportReceivedAt = now;
existing.LastHeartbeatAt = now;
existing.LastSequenceNumber = report.SequenceNumber;
existing.IsOnline = true;
@@ -86,8 +88,8 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
if (!_siteStates.TryGetValue(siteId, out var state))
return;
if (receivedAt > state.LastReportReceivedAt)
state.LastReportReceivedAt = receivedAt;
if (receivedAt > state.LastHeartbeatAt)
state.LastHeartbeatAt = receivedAt;
if (!state.IsOnline)
{
@@ -141,12 +143,15 @@ public class CentralHealthAggregator : BackgroundService, ICentralHealthAggregat
var state = kvp.Value;
if (!state.IsOnline) continue;
var elapsed = now - state.LastReportReceivedAt;
// Use LastHeartbeatAt — heartbeats arrive every ~5s from any
// healthy site node, so OfflineTimeout only fires when no node
// can reach central, not during single-node failovers.
var elapsed = now - state.LastHeartbeatAt;
if (elapsed > _options.OfflineTimeout)
{
state.IsOnline = false;
_logger.LogWarning(
"Site {SiteId} marked offline — no report for {Elapsed}s (timeout: {Timeout}s)",
"Site {SiteId} marked offline — no signal for {Elapsed}s (timeout: {Timeout}s)",
state.SiteId, elapsed.TotalSeconds, _options.OfflineTimeout.TotalSeconds);
}
}

View File

@@ -0,0 +1,82 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ScadaLink.HealthMonitoring;
/// <summary>
/// Central-side counterpart to <see cref="HealthReportSender"/>.
/// Periodically builds a SiteHealthReport for the central cluster itself
/// (siteId = <see cref="CentralSiteId"/>) and feeds it into the local
/// CentralHealthAggregator so the UI can render central as another card
/// on /monitoring/health. Only the cluster leader (Primary) generates
/// reports — the standby's aggregator catches up on failover when it
/// becomes Primary and starts its own loop.
/// </summary>
public class CentralHealthReportLoop : BackgroundService
{
/// <summary>
/// Reserved siteId used to represent the central cluster in the
/// shared CentralHealthAggregator keyspace.
/// </summary>
public const string CentralSiteId = "central";
private readonly ISiteHealthCollector _collector;
private readonly ICentralHealthAggregator _aggregator;
private readonly IClusterNodeProvider _clusterNodeProvider;
private readonly HealthMonitoringOptions _options;
private readonly ILogger<CentralHealthReportLoop> _logger;
// Seeded with Unix-ms so reports from a newly-elected central leader
// always sort after reports from any prior leader for siteId="central".
private long _sequenceNumber = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
public CentralHealthReportLoop(
ISiteHealthCollector collector,
ICentralHealthAggregator aggregator,
IClusterNodeProvider clusterNodeProvider,
IOptions<HealthMonitoringOptions> options,
ILogger<CentralHealthReportLoop> logger)
{
_collector = collector;
_aggregator = aggregator;
_clusterNodeProvider = clusterNodeProvider;
_options = options.Value;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"Central health report loop starting, interval {Interval}s",
_options.ReportInterval.TotalSeconds);
using var timer = new PeriodicTimer(_options.ReportInterval);
while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
try
{
var isPrimary = _clusterNodeProvider.SelfIsPrimary;
_collector.SetActiveNode(isPrimary);
if (!isPrimary)
continue;
_collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes());
var seq = Interlocked.Increment(ref _sequenceNumber);
var report = _collector.CollectReport(CentralSiteId);
var reportWithSeq = report with { SequenceNumber = seq };
_aggregator.ProcessReport(reportWithSeq);
_logger.LogDebug("Generated central health report #{Seq}", seq);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to generate central health report");
}
}
}
}

View File

@@ -19,7 +19,13 @@ public class HealthReportSender : BackgroundService
private readonly string _siteId;
private readonly StoreAndForwardStorage? _sfStorage;
private readonly IClusterNodeProvider? _clusterNodeProvider;
private long _sequenceNumber;
// Seeded with Unix-ms at construction so reports from a freshly-active
// node always sort after reports from any prior active node for the same
// site. Without this seeding, failover would silently drop the new
// active's first reports because their per-process counter starts below
// the prior active's last sequence number.
private long _sequenceNumber = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
public HealthReportSender(
ISiteHealthCollector collector,

View File

@@ -9,4 +9,11 @@ namespace ScadaLink.HealthMonitoring;
public interface IClusterNodeProvider
{
IReadOnlyList<NodeStatus> GetClusterNodes();
/// <summary>
/// True when this node is currently the cluster leader (Primary) for the
/// provider's role scope. Used by the central report loop to decide which
/// node should generate the "central" health report.
/// </summary>
bool SelfIsPrimary { get; }
}

View File

@@ -26,13 +26,16 @@ public static class ServiceCollectionExtensions
}
/// <summary>
/// Register central-side health aggregation services.
/// Register central-side health aggregation services. Includes the
/// <see cref="CentralHealthReportLoop"/> that generates a self-report
/// for the central cluster so it appears on /monitoring/health.
/// </summary>
public static IServiceCollection AddCentralHealthAggregation(this IServiceCollection services)
{
services.AddSingleton<CentralHealthAggregator>();
services.AddSingleton<ICentralHealthAggregator>(sp => sp.GetRequiredService<CentralHealthAggregator>());
services.AddHostedService(sp => sp.GetRequiredService<CentralHealthAggregator>());
services.AddHostedService<CentralHealthReportLoop>();
return services;
}

View File

@@ -9,7 +9,21 @@ public class SiteHealthState
{
public required string SiteId { get; init; }
public SiteHealthReport LatestReport { get; set; } = null!;
/// <summary>
/// Time the latest full <see cref="SiteHealthReport"/> was processed.
/// Used by the UI to surface report staleness during failover.
/// </summary>
public DateTimeOffset LastReportReceivedAt { get; set; }
/// <summary>
/// Time the most recent signal of any kind (full report OR ~5s heartbeat)
/// was received. Drives offline detection — heartbeats from the standby
/// keep the site marked online even when the active node is unable to
/// produce a report (mid-failover, brief stalls).
/// </summary>
public DateTimeOffset LastHeartbeatAt { get; set; }
public long LastSequenceNumber { get; set; }
public bool IsOnline { get; set; }
}