Reformat/cleanup
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m10s
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m10s
This commit is contained in:
@@ -1,8 +1,3 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
@@ -10,17 +5,17 @@ using ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
|
||||
|
||||
/// <summary>
|
||||
/// Health check for CBDDC persistence layer.
|
||||
/// Verifies that the database connection is healthy.
|
||||
/// Health check for CBDDC persistence layer.
|
||||
/// Verifies that the database connection is healthy.
|
||||
/// </summary>
|
||||
public class CBDDCHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IOplogStore _oplogStore;
|
||||
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
|
||||
private readonly CBDDCHostingOptions _options;
|
||||
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCHealthCheck"/> class.
|
||||
/// Initializes a new instance of the <see cref="CBDDCHealthCheck" /> class.
|
||||
/// </summary>
|
||||
/// <param name="oplogStore">The oplog store used to verify persistence health.</param>
|
||||
/// <param name="peerOplogConfirmationStore">The peer confirmation store used for confirmation lag health checks.</param>
|
||||
@@ -31,16 +26,17 @@ public class CBDDCHealthCheck : IHealthCheck
|
||||
CBDDCHostingOptions options)
|
||||
{
|
||||
_oplogStore = oplogStore ?? throw new ArgumentNullException(nameof(oplogStore));
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore ?? throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore ??
|
||||
throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs a health check against the CBDDC persistence layer.
|
||||
/// Performs a health check against the CBDDC persistence layer.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check execution context.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the health check.</param>
|
||||
/// <returns>A <see cref="HealthCheckResult"/> describing the health status.</returns>
|
||||
/// <returns>A <see cref="HealthCheckResult" /> describing the health status.</returns>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
@@ -58,15 +54,18 @@ public class CBDDCHealthCheck : IHealthCheck
|
||||
var peersWithNoConfirmation = new List<string>();
|
||||
var laggingPeers = new List<string>();
|
||||
var criticalLaggingPeers = new List<string>();
|
||||
var lastSuccessfulConfirmationUpdateByPeer = new Dictionary<string, DateTimeOffset?>(StringComparer.Ordinal);
|
||||
var lastSuccessfulConfirmationUpdateByPeer =
|
||||
new Dictionary<string, DateTimeOffset?>(StringComparer.Ordinal);
|
||||
var maxLagMs = 0L;
|
||||
|
||||
var lagThresholdMs = Math.Max(0, _options.Cluster.PeerConfirmationLagThresholdMs);
|
||||
var criticalLagThresholdMs = Math.Max(lagThresholdMs, _options.Cluster.PeerConfirmationCriticalLagThresholdMs);
|
||||
long lagThresholdMs = Math.Max(0, _options.Cluster.PeerConfirmationLagThresholdMs);
|
||||
long criticalLagThresholdMs =
|
||||
Math.Max(lagThresholdMs, _options.Cluster.PeerConfirmationCriticalLagThresholdMs);
|
||||
|
||||
foreach (var peerNodeId in trackedPeers)
|
||||
foreach (string peerNodeId in trackedPeers)
|
||||
{
|
||||
var confirmations = (await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
|
||||
var confirmations =
|
||||
(await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
|
||||
.Where(confirmation => confirmation.IsActive)
|
||||
.ToList();
|
||||
|
||||
@@ -83,19 +82,14 @@ public class CBDDCHealthCheck : IHealthCheck
|
||||
.ThenBy(confirmation => confirmation.ConfirmedLogic)
|
||||
.First();
|
||||
|
||||
var lagMs = Math.Max(0, localHead.PhysicalTime - oldestConfirmation.ConfirmedWall);
|
||||
long lagMs = Math.Max(0, localHead.PhysicalTime - oldestConfirmation.ConfirmedWall);
|
||||
maxLagMs = Math.Max(maxLagMs, lagMs);
|
||||
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] = confirmations.Max(confirmation => confirmation.LastConfirmedUtc);
|
||||
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] =
|
||||
confirmations.Max(confirmation => confirmation.LastConfirmedUtc);
|
||||
|
||||
if (lagMs > lagThresholdMs)
|
||||
{
|
||||
laggingPeers.Add(peerNodeId);
|
||||
}
|
||||
if (lagMs > lagThresholdMs) laggingPeers.Add(peerNodeId);
|
||||
|
||||
if (lagMs > criticalLagThresholdMs)
|
||||
{
|
||||
criticalLaggingPeers.Add(peerNodeId);
|
||||
}
|
||||
if (lagMs > criticalLagThresholdMs) criticalLaggingPeers.Add(peerNodeId);
|
||||
}
|
||||
|
||||
var payload = new Dictionary<string, object>
|
||||
@@ -108,18 +102,14 @@ public class CBDDCHealthCheck : IHealthCheck
|
||||
};
|
||||
|
||||
if (criticalLaggingPeers.Count > 0)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy(
|
||||
$"CBDDC is unhealthy. Critical lag detected for {criticalLaggingPeers.Count} tracked peer(s).",
|
||||
data: payload);
|
||||
}
|
||||
|
||||
if (peersWithNoConfirmation.Count > 0 || laggingPeers.Count > 0)
|
||||
{
|
||||
return HealthCheckResult.Degraded(
|
||||
$"CBDDC is degraded. Lagging peers: {laggingPeers.Count}, unconfirmed peers: {peersWithNoConfirmation.Count}.",
|
||||
data: payload);
|
||||
}
|
||||
|
||||
return HealthCheckResult.Healthy(
|
||||
$"CBDDC is healthy. Latest timestamp: {localHead.PhysicalTime}.",
|
||||
@@ -129,7 +119,7 @@ public class CBDDCHealthCheck : IHealthCheck
|
||||
{
|
||||
return HealthCheckResult.Unhealthy(
|
||||
"CBDDC persistence layer is unavailable",
|
||||
exception: ex);
|
||||
ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user