OTel Prometheus exporter wiring — RedundancyMetrics meter now scraped at /metrics. Closes task #201. Picked Prometheus over OTLP per the earlier recommendation (pull-based means no OTel Collector deployment required for the common K8s/containers case; the endpoint ASP.NET-hosts inside the Admin app already, so one less moving part). Adds two NuGet refs to the Admin csproj: OpenTelemetry.Extensions.Hosting 1.15.2 (stable) + OpenTelemetry.Exporter.Prometheus.AspNetCore 1.15.2-beta.1 (the exporter has historically been beta-only; rest of the OTel ecosystem treats it as production-acceptable + it's what the upstream OTel docs themselves recommend for AspNetCore hosts). Program.cs gains a Metrics:Prometheus:Enabled toggle (defaults true; setting to false disables both the MeterProvider registration + the scrape endpoint entirely for locked-down deployments). When enabled, AddOpenTelemetry().WithMetrics() registers a MeterProvider that subscribes to the "ZB.MOM.WW.OtOpcUa.Redundancy" meter (the exact MeterName constant on RedundancyMetrics) + wires AddPrometheusExporter. MapPrometheusScrapingEndpoint() appends a /metrics handler producing the Prometheus text-format output; deliberately NOT authenticated because scrape jobs typically run on a trusted network + operators who need auth wrap the endpoint behind a reverse-proxy basic-auth gate per fleet-ops convention. appsettings.json declares the toggle with Enabled: true so the default deploy gets metrics automatically — turning off is the explicit action. Future meters (resilience tracker + host status + auth probe) just AddMeter("Name") alongside the existing call to start flowing through the same endpoint without more infrastructure. Admin project builds 0 errors; Admin.Tests 92/92 passing (unchanged — the OTel pipeline runs at request time, not test time). Still-pending work that was NOT part of #201's scope: an equivalent setup for the Server project (different MeterNames — the Polly pipeline builder's tracker + host-status publisher) + a metrics cheat-sheet in docs/observability.md documenting each meter's tag set + expected alerting thresholds. Those are natural follow-ups when fleet-ops starts building dashboards.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
using Microsoft.AspNetCore.Authentication;
|
||||
using Microsoft.AspNetCore.Authentication.Cookies;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using OpenTelemetry.Metrics;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.OtOpcUa.Admin.Components;
|
||||
using ZB.MOM.WW.OtOpcUa.Admin.Hubs;
|
||||
@@ -70,6 +71,19 @@ builder.Services.AddScoped<ILdapAuthService, LdapAuthService>();
|
||||
// SignalR real-time fleet status + alerts (admin-ui.md §"Real-Time Updates").
|
||||
builder.Services.AddHostedService<FleetStatusPoller>();
|
||||
|
||||
// OpenTelemetry Prometheus exporter — Meter stream from RedundancyMetrics + any future
|
||||
// Admin-side instrumentation lands on the /metrics endpoint Prometheus scrapes. Pull-based
|
||||
// means no OTel Collector deployment required for the common deploy-in-a-K8s case; appsettings
|
||||
// Metrics:Prometheus:Enabled=false disables the endpoint entirely for locked-down deployments.
|
||||
var metricsEnabled = builder.Configuration.GetValue("Metrics:Prometheus:Enabled", true);
|
||||
if (metricsEnabled)
|
||||
{
|
||||
builder.Services.AddOpenTelemetry()
|
||||
.WithMetrics(m => m
|
||||
.AddMeter(RedundancyMetrics.MeterName)
|
||||
.AddPrometheusExporter());
|
||||
}
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
app.UseSerilogRequestLogging();
|
||||
@@ -87,6 +101,15 @@ app.MapPost("/auth/logout", async (HttpContext ctx) =>
|
||||
app.MapHub<FleetStatusHub>("/hubs/fleet");
|
||||
app.MapHub<AlertHub>("/hubs/alerts");
|
||||
|
||||
if (metricsEnabled)
|
||||
{
|
||||
// Prometheus scrape endpoint — expose instrumentation registered in the OTel MeterProvider
|
||||
// above. Emits text-format metrics at /metrics; auth is intentionally NOT required (Prometheus
|
||||
// scrape jobs typically run on a trusted network). Operators who need auth put the endpoint
|
||||
// behind a reverse-proxy basic-auth gate per fleet-ops convention.
|
||||
app.MapPrometheusScrapingEndpoint();
|
||||
}
|
||||
|
||||
app.MapRazorComponents<App>().AddInteractiveServerRenderMode();
|
||||
|
||||
await app.RunAsync();
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
<PackageReference Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0"/>
|
||||
<PackageReference Include="Microsoft.AspNetCore.SignalR.Client" Version="10.0.0"/>
|
||||
<PackageReference Include="Serilog.AspNetCore" Version="9.0.0"/>
|
||||
<PackageReference Include="OpenTelemetry.Extensions.Hosting" Version="1.15.2"/>
|
||||
<PackageReference Include="OpenTelemetry.Exporter.Prometheus.AspNetCore" Version="1.15.2-beta.1"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -23,5 +23,10 @@
|
||||
},
|
||||
"Serilog": {
|
||||
"MinimumLevel": "Information"
|
||||
},
|
||||
"Metrics": {
|
||||
"Prometheus": {
|
||||
"Enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user