Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b6884031d | |||
| 7ff7a60ae0 | |||
| 8faa2bf23d | |||
| 2099713ed8 | |||
| c05ffc7b39 | |||
| 60017177cb | |||
| 26bae36f8b | |||
| 368390ea9d | |||
| 8f950722c6 | |||
| 1d729fb0f8 | |||
| 0b99aceacb | |||
| d57b42bcd6 | |||
| 5e87f7e16f |
@@ -42,3 +42,9 @@ config_cache*.db
|
||||
|
||||
# Client CLI/UI runtime scratch (last-connected endpoint cache)
|
||||
session.dat
|
||||
|
||||
# Secrets / local credentials — never commit
|
||||
sql_login.txt
|
||||
|
||||
# OPC UA certificate store (runtime PKI: own/trusted/issued/rejected certs + keys)
|
||||
src/Server/ZB.MOM.WW.OtOpcUa.Host/pki/
|
||||
|
||||
@@ -79,11 +79,11 @@
|
||||
<PackageVersion Include="OpenTelemetry.Extensions.Hosting" Version="1.15.3" />
|
||||
<PackageVersion Include="Polly.Core" Version="8.6.6" />
|
||||
<PackageVersion Include="S7netplus" Version="0.20.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.0" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog" Version="4.3.1" />
|
||||
<PackageVersion Include="Serilog.AspNetCore" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Extensions.Hosting" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Formatting.Compact" Version="3.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="9.0.0" />
|
||||
<PackageVersion Include="Serilog.Settings.Configuration" Version="10.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.Console" Version="6.0.0" />
|
||||
<PackageVersion Include="Serilog.Sinks.File" Version="7.0.0" />
|
||||
<PackageVersion Include="Shouldly" Version="4.3.0" />
|
||||
@@ -96,6 +96,11 @@
|
||||
<PackageVersion Include="xunit" Version="2.9.2" />
|
||||
<PackageVersion Include="xunit.runner.visualstudio" Version="3.0.2" />
|
||||
<PackageVersion Include="xunit.v3" Version="1.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.Akka" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Health.EntityFrameworkCore" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Telemetry" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.Telemetry.Serilog" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Client" Version="0.1.0" />
|
||||
<PackageVersion Include="ZB.MOM.WW.MxGateway.Contracts" Version="0.1.0" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -1,7 +1,23 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<packageSources>
|
||||
<clear />
|
||||
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" protocolVersion="3" />
|
||||
<add key="local-mxgw" value="./nuget-packages" />
|
||||
<add key="dohertj2-gitea" value="https://gitea.dohertylan.com/api/packages/dohertj2/nuget/index.json" />
|
||||
</packageSources>
|
||||
<packageSourceMapping>
|
||||
<packageSource key="nuget.org">
|
||||
<package pattern="*" />
|
||||
</packageSource>
|
||||
<packageSource key="local-mxgw">
|
||||
<package pattern="ZB.MOM.WW.MxGateway.*" />
|
||||
</packageSource>
|
||||
<packageSource key="dohertj2-gitea">
|
||||
<package pattern="ZB.MOM.WW.Health" />
|
||||
<package pattern="ZB.MOM.WW.Health.*" />
|
||||
<package pattern="ZB.MOM.WW.Telemetry" />
|
||||
<package pattern="ZB.MOM.WW.Telemetry.*" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
</configuration>
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
> **Status (2026-05-29): alarm-source leg VERIFIED. Historian-write leg still
|
||||
> pending the Windows sidecar + live AVEVA Historian.**
|
||||
>
|
||||
> **Re-confirmed 2026-05-31** against the same gateway (`http://10.100.0.48:5120`):
|
||||
> the Skip-gated live test passed again, pulling a native `Raise` transition
|
||||
> (`Galaxy!TestArea.TestMachine_001.TestAlarm001`, raw sev 500 → OPC UA 750/High,
|
||||
> category `TestArea`, operator comment `Test alarm #1`) through the production
|
||||
> consumer. Independent re-run, not the original capture.
|
||||
>
|
||||
> This is the D.1 deliverable called for by `docs/plans/alarms-worker-wiring-plan.md`
|
||||
> — captured evidence that a live Galaxy alarm reaches lmxopcua through the native
|
||||
> gateway path (not the sub-attribute fallback). It supersedes the "A.2 blocked"
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Reports Healthy on the admin-role leader, Degraded on a non-leader admin member. Used by
|
||||
/// the <c>/health/active</c> endpoint so external load balancers can route admin-singleton
|
||||
/// traffic to the current leader (cookie sessions still work on either node — DataProtection
|
||||
/// keys are shared).
|
||||
/// </summary>
|
||||
public sealed class AdminRoleLeaderHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IClusterRoleInfo _roleInfo;
|
||||
|
||||
/// <summary>Initializes a new instance of the AdminRoleLeaderHealthCheck class.</summary>
|
||||
/// <param name="roleInfo">The cluster role information provider.</param>
|
||||
public AdminRoleLeaderHealthCheck(IClusterRoleInfo roleInfo)
|
||||
{
|
||||
_roleInfo = roleInfo;
|
||||
}
|
||||
|
||||
/// <summary>Checks the health status of the admin role leader.</summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A task representing the health check operation.</returns>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!_roleInfo.HasRole("admin"))
|
||||
return Task.FromResult(HealthCheckResult.Healthy("Node does not carry admin role"));
|
||||
|
||||
var leader = _roleInfo.RoleLeader("admin");
|
||||
var isLeader = leader is not null && leader.Value.Equals(_roleInfo.LocalNode);
|
||||
|
||||
return Task.FromResult(isLeader
|
||||
? HealthCheckResult.Healthy($"Admin leader ({_roleInfo.LocalNode})")
|
||||
: HealthCheckResult.Degraded($"Admin member but not leader (leader={leader?.Value ?? "<unknown>"})"));
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class AkkaClusterHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly ActorSystem _system;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the AkkaClusterHealthCheck class.
|
||||
/// </summary>
|
||||
/// <param name="system">The Akka actor system to check cluster health for.</param>
|
||||
public AkkaClusterHealthCheck(ActorSystem system)
|
||||
{
|
||||
_system = system;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the Akka cluster asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var cluster = Akka.Cluster.Cluster.Get(_system);
|
||||
var selfUp = cluster.State.Members.Any(m =>
|
||||
m.Address == cluster.SelfAddress && m.Status == MemberStatus.Up);
|
||||
|
||||
return Task.FromResult(selfUp
|
||||
? HealthCheckResult.Healthy($"Self Up; {cluster.State.Members.Count} member(s)")
|
||||
: HealthCheckResult.Degraded("Self not yet Up in cluster"));
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public sealed class DatabaseHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="dbFactory">The database context factory for the config database.</param>
|
||||
public DatabaseHealthCheck(IDbContextFactory<OtOpcUaConfigDbContext> dbFactory)
|
||||
{
|
||||
_dbFactory = dbFactory;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the health of the configuration database.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check context.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var db = await _dbFactory.CreateDbContextAsync(cancellationToken);
|
||||
await db.Deployments.AsNoTracking().Take(1).ToListAsync(cancellationToken);
|
||||
return HealthCheckResult.Healthy("ConfigDb reachable");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("ConfigDb unreachable", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +1,40 @@
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
|
||||
using Microsoft.AspNetCore.Routing;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.Health;
|
||||
using ZB.MOM.WW.Health.Akka;
|
||||
using ZB.MOM.WW.Health.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Health;
|
||||
|
||||
public static class HealthEndpoints
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers the standard ASP.NET Core health-check infrastructure plus the OtOpcUa-specific
|
||||
/// probes. Mirrors ScadaLink's three-tier pattern: <c>ready</c> = boot ok; <c>active</c> =
|
||||
/// fully serving traffic; <c>healthz</c> = bare process liveness.
|
||||
/// Registers the shared ZB.MOM.WW health probes. Tier semantics preserved: configdb + akka on
|
||||
/// ready+active; admin-leader on active only.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection to register health checks with.</param>
|
||||
public static IServiceCollection AddOtOpcUaHealth(this IServiceCollection services)
|
||||
{
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("configdb", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AkkaClusterHealthCheck>("akka", tags: new[] { "ready", "active" })
|
||||
.AddCheck<AdminRoleLeaderHealthCheck>("admin-leader", tags: new[] { "active" });
|
||||
.AddTypeActivatedCheck<DatabaseHealthCheck<OtOpcUaConfigDbContext>>(
|
||||
"configdb",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: new DatabaseHealthCheckOptions<OtOpcUaConfigDbContext>
|
||||
{
|
||||
ProbeQuery = static (db, ct) => db.Deployments.AsNoTracking().Take(1).ToListAsync(ct),
|
||||
})
|
||||
.AddTypeActivatedCheck<AkkaClusterHealthCheck>(
|
||||
"akka",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Ready, ZbHealthTags.Active },
|
||||
args: AkkaClusterStatusPolicy.OtOpcUaCompat)
|
||||
.AddTypeActivatedCheck<ActiveNodeHealthCheck>(
|
||||
"admin-leader",
|
||||
failureStatus: null,
|
||||
tags: new[] { ZbHealthTags.Active },
|
||||
args: "admin");
|
||||
return services;
|
||||
}
|
||||
|
||||
@@ -27,21 +42,7 @@ public static class HealthEndpoints
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaHealth(this IEndpointRouteBuilder app)
|
||||
{
|
||||
// AllowAnonymous on all three — Traefik / k8s liveness probes / load-balancers
|
||||
// hit these without credentials. Without it the AddOtOpcUaAuth fallback policy
|
||||
// 401s every probe and Traefik marks every backend unhealthy.
|
||||
app.MapHealthChecks("/health/ready", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("ready"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/health/active", new HealthCheckOptions
|
||||
{
|
||||
Predicate = c => c.Tags.Contains("active"),
|
||||
}).AllowAnonymous();
|
||||
app.MapHealthChecks("/healthz", new HealthCheckOptions
|
||||
{
|
||||
Predicate = _ => false, // process-liveness only — no probes run.
|
||||
}).AllowAnonymous();
|
||||
app.MapZbHealth();
|
||||
return app;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
using OpenTelemetry.Metrics;
|
||||
using OpenTelemetry.Trace;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Observability;
|
||||
using ZB.MOM.WW.Telemetry;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.Observability;
|
||||
|
||||
@@ -15,16 +15,25 @@ public static class ObservabilityExtensions
|
||||
{
|
||||
/// <summary>Adds OtOpcUa observability (metrics and tracing) to the service collection.</summary>
|
||||
/// <param name="services">The service collection to add observability services to.</param>
|
||||
public static IServiceCollection AddOtOpcUaObservability(this IServiceCollection services)
|
||||
/// <param name="configuration">
|
||||
/// Configuration read for the opt-in OTLP exporter. <c>OtOpcUa:Telemetry:Exporter</c>
|
||||
/// (parsed case-insensitively to <see cref="ZbExporter"/>) switches to OTLP when set to
|
||||
/// <c>Otlp</c>; <c>OtOpcUa:Telemetry:OtlpEndpoint</c> sets the OTLP endpoint. With no
|
||||
/// config the exporter stays Prometheus (the default).
|
||||
/// </param>
|
||||
public static IServiceCollection AddOtOpcUaObservability(this IServiceCollection services, IConfiguration configuration)
|
||||
{
|
||||
services.AddOpenTelemetry()
|
||||
.WithMetrics(b => b
|
||||
.AddMeter(OtOpcUaTelemetry.MeterName)
|
||||
.AddPrometheusExporter())
|
||||
.WithTracing(b => b
|
||||
.AddSource(OtOpcUaTelemetry.ActivitySourceName));
|
||||
|
||||
return services;
|
||||
return services.AddZbTelemetry(o =>
|
||||
{
|
||||
o.ServiceName = "otopcua";
|
||||
o.Meters = [OtOpcUaTelemetry.MeterName];
|
||||
o.ActivitySources = [OtOpcUaTelemetry.ActivitySourceName];
|
||||
if (Enum.TryParse<ZbExporter>(configuration["OtOpcUa:Telemetry:Exporter"], ignoreCase: true, out var exporter))
|
||||
o.Exporter = exporter;
|
||||
var otlp = configuration["OtOpcUa:Telemetry:OtlpEndpoint"];
|
||||
if (!string.IsNullOrWhiteSpace(otlp))
|
||||
o.OtlpEndpoint = otlp;
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -35,7 +44,7 @@ public static class ObservabilityExtensions
|
||||
/// <param name="app">The endpoint route builder.</param>
|
||||
public static IEndpointRouteBuilder MapOtOpcUaMetrics(this IEndpointRouteBuilder app)
|
||||
{
|
||||
app.MapPrometheusScrapingEndpoint("/metrics");
|
||||
app.MapZbMetrics();
|
||||
return app;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ using ZB.MOM.WW.OtOpcUa.Runtime;
|
||||
using ZB.MOM.WW.OtOpcUa.Security;
|
||||
using ZB.MOM.WW.OtOpcUa.Security.Endpoints;
|
||||
using ZB.MOM.WW.OtOpcUa.Security.Ldap;
|
||||
using ZB.MOM.WW.Telemetry.Serilog;
|
||||
|
||||
// Roles drive the entire conditional wiring below — see ZB.MOM.WW.OtOpcUa.Cluster.RoleParser.
|
||||
var roles = RoleParser.Parse(Environment.GetEnvironmentVariable("OTOPCUA_ROLES"));
|
||||
@@ -45,11 +46,10 @@ var roleSuffix = roles.Length == 0 ? null : string.Join('-', roles.OrderBy(r =>
|
||||
if (roleSuffix is not null)
|
||||
builder.Configuration.AddJsonFile($"appsettings.{roleSuffix}.json", optional: true, reloadOnChange: true);
|
||||
|
||||
// Serilog — rolling daily file sink per CLAUDE.md. Console for local dev.
|
||||
builder.Host.UseSerilog((ctx, lc) => lc
|
||||
.ReadFrom.Configuration(ctx.Configuration)
|
||||
.WriteTo.Console()
|
||||
.WriteTo.File("logs/otopcua-.log", rollingInterval: RollingInterval.Day));
|
||||
// Serilog — shared ZB.MOM.WW.Telemetry bootstrap. Sinks (Console + rolling daily file)
|
||||
// now live in appsettings.json (ReadFrom.Configuration); AddZbSerilog layers in the
|
||||
// shared NodeHostname / TraceContext / Redaction enrichers and trace correlation.
|
||||
builder.AddZbSerilog(o => o.ServiceName = "otopcua");
|
||||
|
||||
// Windows-service registration is handled at install time by scripts/install/Install-Services.ps1
|
||||
// (Task 62) rather than in-process, so the binary stays cross-platform-compilable.
|
||||
@@ -135,7 +135,7 @@ if (hasAdmin)
|
||||
}
|
||||
|
||||
builder.Services.AddOtOpcUaHealth();
|
||||
builder.Services.AddOtOpcUaObservability();
|
||||
builder.Services.AddOtOpcUaObservability(builder.Configuration);
|
||||
|
||||
var app = builder.Build();
|
||||
app.UseSerilogRequestLogging();
|
||||
|
||||
@@ -27,6 +27,11 @@
|
||||
</PackageReference>
|
||||
<PackageReference Include="OpenTelemetry.Extensions.Hosting"/>
|
||||
<PackageReference Include="OpenTelemetry.Exporter.Prometheus.AspNetCore"/>
|
||||
<PackageReference Include="ZB.MOM.WW.Health" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.Akka" />
|
||||
<PackageReference Include="ZB.MOM.WW.Health.EntityFrameworkCore" />
|
||||
<PackageReference Include="ZB.MOM.WW.Telemetry" />
|
||||
<PackageReference Include="ZB.MOM.WW.Telemetry.Serilog" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -1 +1,9 @@
|
||||
{}
|
||||
{
|
||||
"Serilog": {
|
||||
"Using": [ "Serilog.Sinks.Console", "Serilog.Sinks.File" ],
|
||||
"WriteTo": [
|
||||
{ "Name": "Console" },
|
||||
{ "Name": "File", "Args": { "path": "logs/otopcua-.log", "rollingInterval": "Day" } }
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user