fix: only active singleton node sends health reports
Both nodes of a site cluster were sending health reports. The standby node (without the DeploymentManager singleton) reported 0 instances and no connections, overwriting the active node's data in the aggregator. Added IsActiveNode flag to ISiteHealthCollector, set by DeploymentManagerActor on PreStart/PostStop. HealthReportSender skips sending when the node is not active. Also ensured EnsureDclConnections is called during startup batch creation so data connections survive container restarts.
This commit is contained in:
17
cli_issues.md
Normal file
17
cli_issues.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# CLI Issues & Missing Features
|
||||||
|
|
||||||
|
Log any bugs, unexpected behavior, or missing features in the ScadaLink CLI here.
|
||||||
|
|
||||||
|
## Format
|
||||||
|
|
||||||
|
```
|
||||||
|
### [Short title]
|
||||||
|
- **Command**: `scadalink <command>`
|
||||||
|
- **Description**: What happened or what's missing
|
||||||
|
- **Expected**: What should happen
|
||||||
|
- **Error output** (if applicable):
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- Add issues below this line -->
|
||||||
@@ -1247,3 +1247,7 @@ The CLI connects to the Central cluster using Akka.NET's `ClusterClient`. It doe
|
|||||||
The connection is established per-command invocation and torn down cleanly via `CoordinatedShutdown` when the command completes.
|
The connection is established per-command invocation and torn down cleanly via `CoordinatedShutdown` when the command completes.
|
||||||
|
|
||||||
Role enforcement is applied by the ManagementActor on the server side. The current CLI placeholder user carries `Admin`, `Design`, and `Deployment` roles; production use will integrate LDAP authentication via `--username` / `--password`.
|
Role enforcement is applied by the ManagementActor on the server side. The current CLI placeholder user carries `Admin`, `Design`, and `Deployment` roles; production use will integrate LDAP authentication via `--username` / `--password`.
|
||||||
|
|
||||||
|
## Issues & Missing Features
|
||||||
|
|
||||||
|
If you encounter bugs, unexpected behavior, or missing features in the CLI, log them in [`cli_issues.md`](../../cli_issues.md) in the project root. Include a brief description, the command involved, and any relevant error output.
|
||||||
|
|||||||
@@ -49,9 +49,10 @@ public class HealthReportSender : BackgroundService
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// TODO: Wire S&F buffer depths when StoreAndForward service is available in DI
|
// Only the active node (running the DeploymentManager singleton) sends health reports.
|
||||||
// e.g., var depths = await _bufferDepthProvider.GetDepthsAsync();
|
// The standby node has no instance/connection data and would overwrite the active's report.
|
||||||
// _collector.SetStoreAndForwardDepths(depths);
|
if (!_collector.IsActiveNode)
|
||||||
|
continue;
|
||||||
|
|
||||||
var seq = Interlocked.Increment(ref _sequenceNumber);
|
var seq = Interlocked.Increment(ref _sequenceNumber);
|
||||||
var report = _collector.CollectReport(_siteId);
|
var report = _collector.CollectReport(_siteId);
|
||||||
|
|||||||
@@ -17,5 +17,7 @@ public interface ISiteHealthCollector
|
|||||||
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
||||||
void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths);
|
void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths);
|
||||||
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
||||||
|
void SetActiveNode(bool isActive);
|
||||||
|
bool IsActiveNode { get; }
|
||||||
SiteHealthReport CollectReport(string siteId);
|
SiteHealthReport CollectReport(string siteId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
||||||
private IReadOnlyDictionary<string, int> _sfBufferDepths = new Dictionary<string, int>();
|
private IReadOnlyDictionary<string, int> _sfBufferDepths = new Dictionary<string, int>();
|
||||||
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
||||||
|
private volatile bool _isActiveNode;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Increment the script error counter. Covers unhandled exceptions,
|
/// Increment the script error counter. Covers unhandled exceptions,
|
||||||
@@ -90,6 +91,10 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
Interlocked.Exchange(ref _disabledInstanceCount, disabled);
|
Interlocked.Exchange(ref _disabledInstanceCount, disabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void SetActiveNode(bool isActive) => _isActiveNode = isActive;
|
||||||
|
|
||||||
|
public bool IsActiveNode => _isActiveNode;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Collect the current health report for the site and reset interval counters.
|
/// Collect the current health report for the site and reset interval counters.
|
||||||
/// Connection statuses and tag resolution counts are NOT reset (they reflect current state).
|
/// Connection statuses and tag resolution counts are NOT reset (they reflect current state).
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
protected override void PreStart()
|
protected override void PreStart()
|
||||||
{
|
{
|
||||||
base.PreStart();
|
base.PreStart();
|
||||||
|
_healthCollector?.SetActiveNode(true);
|
||||||
_logger.LogInformation("DeploymentManagerActor starting — loading deployed configs from SQLite...");
|
_logger.LogInformation("DeploymentManagerActor starting — loading deployed configs from SQLite...");
|
||||||
|
|
||||||
// Load all configs asynchronously and pipe to self
|
// Load all configs asynchronously and pipe to self
|
||||||
@@ -94,6 +95,12 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
}).PipeTo(Self);
|
}).PipeTo(Self);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected override void PostStop()
|
||||||
|
{
|
||||||
|
_healthCollector?.SetActiveNode(false);
|
||||||
|
base.PostStop();
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// OneForOneStrategy: Resume on exceptions to preserve Instance Actor state,
|
/// OneForOneStrategy: Resume on exceptions to preserve Instance Actor state,
|
||||||
/// Stop only on ActorInitializationException (actor failed to start).
|
/// Stop only on ActorInitializationException (actor failed to start).
|
||||||
@@ -160,6 +167,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
for (var i = startIdx; i < endIdx; i++)
|
for (var i = startIdx; i < endIdx; i++)
|
||||||
{
|
{
|
||||||
var config = state.Configs[i];
|
var config = state.Configs[i];
|
||||||
|
EnsureDclConnections(config.ConfigJson);
|
||||||
CreateInstanceActor(config.InstanceUniqueName, config.ConfigJson);
|
CreateInstanceActor(config.InstanceUniqueName, config.ConfigJson);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user