993126273a
Implement health checks for SQL Server, Oracle databases (JDE, CMS, GIW), and LDAP servers to enable comprehensive system monitoring via the /health endpoint.
225 lines
7.7 KiB
C#
225 lines
7.7 KiB
C#
using System.Diagnostics;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
|
|
|
namespace JdeScoping.DataAccess.HealthChecks;
|
|
|
|
/// <summary>
|
|
/// Health check for database connectivity to all data sources.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Checks connectivity to:
|
|
/// - LotFinder (SQL Server) - Critical, causes Unhealthy if unavailable
|
|
/// - JDE, CMS, GIW (Oracle) - Important but not critical, causes Degraded if unavailable
|
|
/// </remarks>
|
|
public class DatabaseHealthCheck : IHealthCheck
|
|
{
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private static readonly TimeSpan ConnectionTimeout = TimeSpan.FromSeconds(5);
|
|
private static readonly long DegradedThresholdMs = 2000;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the <see cref="DatabaseHealthCheck"/> class.
|
|
/// </summary>
|
|
/// <param name="connectionFactory">The database connection factory.</param>
|
|
public DatabaseHealthCheck(IDbConnectionFactory connectionFactory)
|
|
{
|
|
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task<HealthCheckResult> CheckHealthAsync(
|
|
HealthCheckContext context,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var data = new Dictionary<string, object>();
|
|
var unhealthyDbs = new List<string>();
|
|
var degradedDbs = new List<string>();
|
|
|
|
// Check LotFinder (SQL Server) - Critical
|
|
await CheckLotFinderAsync(data, unhealthyDbs, degradedDbs, cancellationToken);
|
|
|
|
// Check Oracle databases - Important but not critical
|
|
await CheckJdeAsync(data, unhealthyDbs, degradedDbs, cancellationToken);
|
|
await CheckCmsAsync(data, unhealthyDbs, degradedDbs, cancellationToken);
|
|
await CheckGiwAsync(data, unhealthyDbs, degradedDbs, cancellationToken);
|
|
|
|
// Determine overall status
|
|
if (unhealthyDbs.Count > 0)
|
|
{
|
|
return HealthCheckResult.Unhealthy(
|
|
$"Database(s) unavailable: {string.Join(", ", unhealthyDbs)}",
|
|
data: data);
|
|
}
|
|
|
|
if (degradedDbs.Count > 0)
|
|
{
|
|
return HealthCheckResult.Degraded(
|
|
$"Database(s) slow/degraded: {string.Join(", ", degradedDbs)}",
|
|
data: data);
|
|
}
|
|
|
|
return HealthCheckResult.Healthy("All databases connected", data: data);
|
|
}
|
|
|
|
private async Task CheckLotFinderAsync(
|
|
Dictionary<string, object> data,
|
|
List<string> unhealthyDbs,
|
|
List<string> degradedDbs,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
const string name = "LotFinder";
|
|
var sw = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
|
cts.CancelAfter(ConnectionTimeout);
|
|
|
|
await using var conn = await _connectionFactory.CreateLotFinderConnectionAsync(cts.Token);
|
|
sw.Stop();
|
|
|
|
RecordSuccess(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
|
{
|
|
sw.Stop();
|
|
RecordTimeout(name, sw.ElapsedMilliseconds, data, unhealthyDbs);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
RecordFailure(name, sw.ElapsedMilliseconds, ex.Message, data, unhealthyDbs);
|
|
}
|
|
}
|
|
|
|
private async Task CheckJdeAsync(
|
|
Dictionary<string, object> data,
|
|
List<string> unhealthyDbs,
|
|
List<string> degradedDbs,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
const string name = "JDE";
|
|
var sw = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
|
cts.CancelAfter(ConnectionTimeout);
|
|
|
|
await using var conn = await _connectionFactory.CreateJdeConnectionAsync(cts.Token);
|
|
sw.Stop();
|
|
|
|
RecordSuccess(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
|
{
|
|
sw.Stop();
|
|
RecordTimeout(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
RecordFailure(name, sw.ElapsedMilliseconds, ex.Message, data, degradedDbs);
|
|
}
|
|
}
|
|
|
|
private async Task CheckCmsAsync(
|
|
Dictionary<string, object> data,
|
|
List<string> unhealthyDbs,
|
|
List<string> degradedDbs,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
const string name = "CMS";
|
|
var sw = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
|
cts.CancelAfter(ConnectionTimeout);
|
|
|
|
await using var conn = await _connectionFactory.CreateCmsConnectionAsync(cts.Token);
|
|
sw.Stop();
|
|
|
|
RecordSuccess(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
|
{
|
|
sw.Stop();
|
|
RecordTimeout(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
RecordFailure(name, sw.ElapsedMilliseconds, ex.Message, data, degradedDbs);
|
|
}
|
|
}
|
|
|
|
private async Task CheckGiwAsync(
|
|
Dictionary<string, object> data,
|
|
List<string> unhealthyDbs,
|
|
List<string> degradedDbs,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
const string name = "GIW";
|
|
var sw = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
|
cts.CancelAfter(ConnectionTimeout);
|
|
|
|
await using var conn = await _connectionFactory.CreateGiwConnectionAsync(cts.Token);
|
|
sw.Stop();
|
|
|
|
RecordSuccess(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
|
|
{
|
|
sw.Stop();
|
|
RecordTimeout(name, sw.ElapsedMilliseconds, data, degradedDbs);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
RecordFailure(name, sw.ElapsedMilliseconds, ex.Message, data, degradedDbs);
|
|
}
|
|
}
|
|
|
|
private static void RecordSuccess(
|
|
string name,
|
|
long elapsedMs,
|
|
Dictionary<string, object> data,
|
|
List<string> degradedDbs)
|
|
{
|
|
data[$"{name}_Status"] = "Connected";
|
|
data[$"{name}_ResponseMs"] = elapsedMs;
|
|
|
|
if (elapsedMs > DegradedThresholdMs)
|
|
{
|
|
degradedDbs.Add(name);
|
|
}
|
|
}
|
|
|
|
private static void RecordTimeout(
|
|
string name,
|
|
long elapsedMs,
|
|
Dictionary<string, object> data,
|
|
List<string> problemDbs)
|
|
{
|
|
data[$"{name}_Status"] = "Timeout";
|
|
data[$"{name}_ResponseMs"] = elapsedMs;
|
|
data[$"{name}_Error"] = $"Connection timeout after {ConnectionTimeout.TotalSeconds}s";
|
|
problemDbs.Add(name);
|
|
}
|
|
|
|
private static void RecordFailure(
|
|
string name,
|
|
long elapsedMs,
|
|
string errorMessage,
|
|
Dictionary<string, object> data,
|
|
List<string> problemDbs)
|
|
{
|
|
data[$"{name}_Status"] = "Failed";
|
|
data[$"{name}_ResponseMs"] = elapsedMs;
|
|
data[$"{name}_Error"] = errorMessage;
|
|
problemDbs.Add(name);
|
|
}
|
|
}
|