Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d729fb0f8 | |||
| 0b99aceacb | |||
| d57b42bcd6 | |||
| 5e87f7e16f |
@@ -42,3 +42,9 @@ config_cache*.db
|
||||
|
||||
# Client CLI/UI runtime scratch (last-connected endpoint cache)
|
||||
session.dat
|
||||
|
||||
# Secrets / local credentials — never commit
|
||||
sql_login.txt
|
||||
|
||||
# OPC UA certificate store (runtime PKI: own/trusted/issued/rejected certs + keys)
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/pki/
|
||||
|
||||
@@ -96,6 +96,9 @@
|
||||
<PackageVersion Include="xunit" Version="2.9.2" />
|
||||
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.2" />
|
||||
<PackageVersion Include="xunit.v3" Version="1.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.Akka" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.EntityFrameworkCore" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Contracts" Version="0.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -3,5 +3,18 @@
|
||||
<packageSources>
|
||||
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" protocolVersion="3" />
|
||||
<add key="local-mxgw" value="./nuget-packages" />
|
||||
<add key="dohertj2-gitea" value="https://gitea.dohertylan.com/api/packages/dohertj2/nuget/index.json" />
|
||||
</packageSources>
|
||||
<packageSourceMapping>
|
||||
<packageSource key="nuget.org">
|
||||
<package pattern="*" />
|
||||
</packageSource>
|
||||
<packageSource key="local-mxgw">
|
||||
<package pattern="ZB.MOM.WW.MxGateway.*" />
|
||||
</packageSource>
|
||||
<packageSource key="dohertj2-gitea">
|
||||
<package pattern="ZB.MOM.WW.Health" />
|
||||
<package pattern="ZB.MOM.WW.Health.*" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
</configuration>
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
> **Status (2026-05-29): alarm-source leg VERIFIED. Historian-write leg still
|
||||
> pending the Windows sidecar + live AVEVA Historian.**
|
||||
>
|
||||
> **Re-confirmed 2026-05-31** against the same gateway (`http://10.100.0.48:5120`):
|
||||
> the Skip-gated live test passed again, pulling a native `Raise` transition
|
||||
> (`Galaxy!TestArea.TestMachine_001.TestAlarm001`, raw sev 500 → OPC UA 750/High,
|
||||
> category `TestArea`, operator comment `Test alarm #1`) through the production
|
||||
> consumer. Independent re-run, not the original capture.
|
||||
>
|
||||
> This is the D.1 deliverable called for by `docs/plans/alarms-worker-wiring-plan.md`
|
||||
> — captured evidence that a live Galaxy alarm reaches lmxopcua through the native
|
||||
> gateway path (not the sub-attribute fallback). It supersedes the "A.2 blocked"
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Reports Healthy on the admin-role leader, Degraded on a non-leader admin member. Used by
|
||||
/// the <c>/health/active</c> endpoint so external load balancers can route admin-singleton
|
||||
/// traffic to the current leader (cookie sessions still work on either node — DataProtection
|
||||
/// keys are shared).
|
||||
/// </summary>
|
||||
public sealed class AdminRoleLeaderHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IClusterRoleInfo _roleInfo;
|
||||
|
||||
/// <summary>Initializes a new instance of the AdminRoleLeaderHealthCheck class.</summary>
|
||||
/// <param name="roleInfo">The cluster role information provider.</param>
|
||||
public AdminRoleLeaderHealthCheck(IClusterRoleInfo roleInfo)
|
||||
{
|
||||
_roleInfo = roleInfo;
|
||||
}
|
||||
|
||||
/// <summary>Checks the health status of the admin role leader.</summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task representing the health check operation.</returns>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_roleInfo.HasRole("admin"))
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Node does not carry admin role"));
|
||||
|
||||
var leader = _roleInfo.RoleLeader("admin");
|
||||
var isLeader = leader is not null && leader.Value.Equals(_roleInfo.LocalNode);
|
||||
|
||||
return Task.FromResult(isLeader
|
||||
? HealthCheckResult.Healthy($"Admin leader ({_roleInfo.LocalNode})")
|
||||
: HealthCheckResult.Degraded($"Admin member but not leader (leader={leader?.Value ?? "<unknown>"})"));
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ActorSystem _system;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the AkkaClusterHealthCheck class.
|
||||
/// </summary>
|
||||
/// <param name="system">The Akka actor system to check cluster health for.</param>
|
||||
public AkkaClusterHealthCheck(ActorSystem system)
|
||||
{
|
||||
_system = system;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the Akka cluster asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(_system);
|
||||
var selfUp = cluster.State.Members.Any(m =>
|
||||
m.Address == cluster.SelfAddress && m.Status == MemberStatus.Up);
|
||||
|
||||
return Task.FromResult(selfUp
|
||||
? HealthCheckResult.Healthy($"Self Up; {cluster.State.Members.Count} member(s)")
|
||||
: HealthCheckResult.Degraded("Self not yet Up in cluster"));
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="dbFactory">The database context factory for the config database.</param>
|
||||
public DatabaseHealthCheck(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
|
||||
{
|
||||
_dbFactory = dbFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the configuration database.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var db = await _dbFactory.CreateDbContextAsync(cancellationToken);
|
||||
await db.Deployments.AsNoTracking().Take(1).ToListAsync(cancellationToken);
|
||||
return HealthCheckResult.Healthy("ConfigDb reachable");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("ConfigDb unreachable", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,40 @@
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public static class HealthEndpoints
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers the standard ASP.NET Core health-check infrastructure plus the OtOpcUa-specific
|
||||
/// probes. Mirrors ScadaLink's three-tier pattern: <c>ready</c> = boot ok; <c>active</c> =
|
||||
/// fully serving traffic; <c>healthz</c> = bare process liveness.
|
||||
/// Registers the shared ZB.MOM.WW health probes. Tier semantics preserved: configdb + akka on
|
||||
/// ready+active; admin-leader on active only.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection to register health checks with.</param>
|
||||
public static IServiceCollection AddOtOpcUaHealth(this IServiceCollection services)
|
||||
{
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("configdb", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AdminRoleLeaderHealthCheck>("admin-leader", tags: new[] { "active" });
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<OtOpcUaConfigDbContext>>(
|
||||
"configdb",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: new DatabaseHealthCheckOptions<OtOpcUaConfigDbContext>
|
||||
{
|
||||
ProbeQuery = static (db, ct) => db.Deployments.AsNoTracking().Take(1).ToListAsync(ct),
|
||||
})
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: AkkaClusterStatusPolicy.OtOpcUaCompat)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"admin-leader",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active },
|
||||
args: "admin");
|
||||
return services;
|
||||
}
|
||||
|
||||
@@ -27,21 +42,7 @@ public static class HealthEndpoints
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaHealth(this IEndpointRouteBuilder app)
|
||||
{
|
||||
// AllowAnonymous on all three — Traefik / k8s liveness probes / load-balancers
|
||||
// hit these without credentials. Without it the AddOtOpcUaAuth fallback policy
|
||||
// 401s every probe and Traefik marks every backend unhealthy.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("ready"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("active"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/healthz", new HealthCheckOptions
|
||||
{
|
||||
Predicate = _ => false, // process-liveness only — no probes run.
|
||||
}).AllowAnonymous();
|
||||
app.MapZbHealth();
|
||||
return app;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
</PackageReference>
|
||||
<PackageReference Include="OpenTelemetry.Extensions.Hosting"/>
|
||||
<PackageReference Include="OpenTelemetry.Exporter.Prometheus.AspNetCore"/>
|
||||
<PackageReference Include="ZB.MOM.WW.Health" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.Akka" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.EntityFrameworkCore" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
Reference in New Issue
Block a user