rename: prefix gateway projects/namespaces with ZB.MOM.WW + sln→slnx
Apply the ZB.MOM.WW. prefix to all gateway-side projects, folders,
.csproj/.sln contents, C# namespaces, using directives, generated proto
C# (csharp_namespace + checked-in generated files), InternalsVisibleTo
attributes, project-name string literals (LoadProject, .sln lookups,
worker exe paths, staticwebassets manifest), and the install/script/doc
references that point at any of the above. Migrate the solution from
.sln to .slnx via `dotnet sln migrate` and delete the old file.
External-runtime identifiers are intentionally NOT prefixed so external
configuration keeps working:
- GatewayMetrics.cs MeterName ("MxGateway.Server")
- DashboardAuthenticationDefaults Scheme/Policy ("MxGateway.Dashboard")
- GatewayRequestLoggingMiddleware logger category ("MxGateway.Request")
- StaRuntime thread name ("MxGateway.Worker.STA")
- appsettings.json root section "MxGateway" + env-var prefix
MxGateway__... and secret-name MxGateway:ApiKeyPepper
- C:\ProgramData\MxGateway\ data dir paths
Also fixes two tests that were not rename-related but became visible
while validating the rename:
- WorkerLiveMxAccessSmokeTests.ShutDownAsync: cancellation that the
gateway service correctly maps to RpcException(Cancelled) per gRPC
convention was being misclassified as a stream fault. Added a sibling
catch on RpcException with StatusCode.Cancelled.
- IntegrationTestEnvironment.ResolveRepositoryRoot: extracted IsRepositoryRoot
and made it accept either a .git marker OR a .sln/.slnx next to src/
so the worker-exe walker works in non-git working copies.
clients/proto/proto-inputs.json's protoRoot updated to point at
src/ZB.MOM.WW.MxGateway.Contracts/Protos.
Verified by `dotnet build` and a full `dotnet test` of the .slnx with
MXGATEWAY_RUN_LIVE_{MXACCESS,LDAP,GALAXY}_TESTS=1:
Tests: 472/472 pass
Worker.Tests: 280/280 pass (4 dev-rig [Fact(Skip=...)] skipped)
IntegrationTests: 18/18 pass
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,282 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||
using ZB.MOM.WW.MxGateway.Server.Galaxy;
|
||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||
using ZB.MOM.WW.MxGateway.Server.Security.Authentication;
|
||||
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||
|
||||
namespace ZB.MOM.WW.MxGateway.Server.Dashboard;
|
||||
|
||||
public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
{
|
||||
private const string HealthyStatus = "Healthy";
|
||||
|
||||
private readonly ISessionRegistry _sessionRegistry;
|
||||
private readonly GatewayMetrics _metrics;
|
||||
private readonly IGatewayConfigurationProvider _configurationProvider;
|
||||
private readonly IGalaxyHierarchyCache _galaxyHierarchyCache;
|
||||
private readonly IApiKeyAdminStore _apiKeyAdminStore;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly DateTimeOffset _gatewayStartedAt;
|
||||
private readonly TimeSpan _snapshotInterval;
|
||||
private readonly TimeSpan _apiKeySummaryRefreshTimeout = TimeSpan.FromSeconds(2);
|
||||
private readonly int _recentFaultLimit;
|
||||
private readonly int _recentSessionLimit;
|
||||
private readonly ILogger<DashboardSnapshotService> _logger;
|
||||
private readonly SemaphoreSlim _apiKeySummaryRefreshGate = new(1, 1);
|
||||
private IReadOnlyList<DashboardApiKeySummary> _apiKeySummaries = Array.Empty<DashboardApiKeySummary>();
|
||||
|
||||
/// <summary>Initializes a new instance of the DashboardSnapshotService class.</summary>
|
||||
/// <param name="sessionRegistry">Registry of active gateway sessions.</param>
|
||||
/// <param name="metrics">Gateway metrics collector.</param>
|
||||
/// <param name="configurationProvider">Gateway configuration provider.</param>
|
||||
/// <param name="galaxyHierarchyCache">Galaxy hierarchy cache.</param>
|
||||
/// <param name="options">Gateway configuration options.</param>
|
||||
/// <param name="timeProvider">Provider for current time; defaults to system time.</param>
|
||||
public DashboardSnapshotService(
|
||||
ISessionRegistry sessionRegistry,
|
||||
GatewayMetrics metrics,
|
||||
IGatewayConfigurationProvider configurationProvider,
|
||||
IGalaxyHierarchyCache galaxyHierarchyCache,
|
||||
IApiKeyAdminStore apiKeyAdminStore,
|
||||
IOptions<GatewayOptions> options,
|
||||
TimeProvider? timeProvider = null,
|
||||
ILogger<DashboardSnapshotService>? logger = null)
|
||||
{
|
||||
_sessionRegistry = sessionRegistry ?? throw new ArgumentNullException(nameof(sessionRegistry));
|
||||
_metrics = metrics ?? throw new ArgumentNullException(nameof(metrics));
|
||||
_configurationProvider = configurationProvider ?? throw new ArgumentNullException(nameof(configurationProvider));
|
||||
_galaxyHierarchyCache = galaxyHierarchyCache ?? throw new ArgumentNullException(nameof(galaxyHierarchyCache));
|
||||
_apiKeyAdminStore = apiKeyAdminStore ?? throw new ArgumentNullException(nameof(apiKeyAdminStore));
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_gatewayStartedAt = _timeProvider.GetUtcNow();
|
||||
_snapshotInterval = TimeSpan.FromMilliseconds(options.Value.Dashboard.SnapshotIntervalMilliseconds);
|
||||
_recentFaultLimit = options.Value.Dashboard.RecentFaultLimit;
|
||||
_recentSessionLimit = options.Value.Dashboard.RecentSessionLimit;
|
||||
_logger = logger ?? NullLogger<DashboardSnapshotService>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a current dashboard snapshot of gateway state.
|
||||
/// </summary>
|
||||
/// <returns>Dashboard snapshot.</returns>
|
||||
public DashboardSnapshot GetSnapshot()
|
||||
{
|
||||
DateTimeOffset generatedAt = _timeProvider.GetUtcNow();
|
||||
IReadOnlyList<GatewaySession> sessions = _sessionRegistry.Snapshot()
|
||||
.OrderByDescending(session => session.OpenedAt)
|
||||
.ToArray();
|
||||
GatewayMetricsSnapshot metricsSnapshot = _metrics.GetSnapshot();
|
||||
IReadOnlyList<DashboardSessionSummary> sessionSummaries = sessions
|
||||
.Take(ResolveLimit(_recentSessionLimit))
|
||||
.Select(session => CreateSessionSummary(session, metricsSnapshot))
|
||||
.ToArray();
|
||||
IReadOnlyList<DashboardWorkerSummary> workerSummaries = sessions
|
||||
.Where(session => session.WorkerClient is { State: not WorkerClientState.Closed })
|
||||
.Select(CreateWorkerSummary)
|
||||
.ToArray();
|
||||
|
||||
return new DashboardSnapshot(
|
||||
GeneratedAt: generatedAt,
|
||||
GatewayStartedAt: _gatewayStartedAt,
|
||||
GatewayUptime: generatedAt - _gatewayStartedAt,
|
||||
GatewayStatus: HealthyStatus,
|
||||
GatewayVersion: typeof(DashboardSnapshotService).Assembly.GetName().Version?.ToString() ?? "unknown",
|
||||
Sessions: sessionSummaries,
|
||||
Workers: workerSummaries,
|
||||
Metrics: CreateMetricSummaries(metricsSnapshot),
|
||||
Faults: CreateFaultSummaries(sessions, generatedAt),
|
||||
ApiKeys: Volatile.Read(ref _apiKeySummaries),
|
||||
Configuration: _configurationProvider.GetEffectiveConfiguration(),
|
||||
Galaxy: DashboardGalaxyProjector.Project(_galaxyHierarchyCache.Current));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Watches dashboard snapshots at regular intervals asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Async enumerable of dashboard snapshots.</returns>
|
||||
public async IAsyncEnumerable<DashboardSnapshot> WatchSnapshotsAsync(
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
await RefreshApiKeySummariesAsync(cancellationToken).ConfigureAwait(false);
|
||||
yield return GetSnapshot();
|
||||
|
||||
using PeriodicTimer timer = new(_snapshotInterval, _timeProvider);
|
||||
while (true)
|
||||
{
|
||||
bool hasNext;
|
||||
try
|
||||
{
|
||||
hasNext = await timer.WaitForNextTickAsync(cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
if (!hasNext)
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
await RefreshApiKeySummariesAsync(cancellationToken).ConfigureAwait(false);
|
||||
yield return GetSnapshot();
|
||||
}
|
||||
}
|
||||
|
||||
private static DashboardSessionSummary CreateSessionSummary(
|
||||
GatewaySession session,
|
||||
GatewayMetricsSnapshot metricsSnapshot)
|
||||
{
|
||||
IWorkerClient? workerClient = session.WorkerClient;
|
||||
metricsSnapshot.EventsBySession.TryGetValue(session.SessionId, out long eventsReceived);
|
||||
|
||||
return new DashboardSessionSummary(
|
||||
SessionId: session.SessionId,
|
||||
BackendName: session.BackendName,
|
||||
State: session.State,
|
||||
ClientIdentity: DashboardRedactor.Redact(session.ClientIdentity),
|
||||
ClientSessionName: DashboardRedactor.Redact(session.ClientSessionName),
|
||||
ClientCorrelationId: DashboardRedactor.Redact(session.ClientCorrelationId),
|
||||
OpenedAt: session.OpenedAt,
|
||||
LastClientActivityAt: session.LastClientActivityAt,
|
||||
LeaseExpiresAt: session.LeaseExpiresAt,
|
||||
WorkerProcessId: workerClient?.ProcessId,
|
||||
WorkerState: workerClient?.State,
|
||||
LastWorkerHeartbeatAt: workerClient?.LastHeartbeatAt,
|
||||
EventsReceived: eventsReceived,
|
||||
LastFault: DashboardRedactor.Redact(session.FinalFault));
|
||||
}
|
||||
|
||||
private static DashboardWorkerSummary CreateWorkerSummary(GatewaySession session)
|
||||
{
|
||||
IWorkerClient workerClient = session.WorkerClient!;
|
||||
|
||||
return new DashboardWorkerSummary(
|
||||
SessionId: session.SessionId,
|
||||
ProcessId: workerClient.ProcessId,
|
||||
State: workerClient.State,
|
||||
LastHeartbeatAt: workerClient.LastHeartbeatAt,
|
||||
LastFault: DashboardRedactor.Redact(session.FinalFault));
|
||||
}
|
||||
|
||||
private static IReadOnlyList<DashboardMetricSummary> CreateMetricSummaries(GatewayMetricsSnapshot snapshot)
|
||||
{
|
||||
List<DashboardMetricSummary> metrics =
|
||||
[
|
||||
new("mxgateway.sessions.open", snapshot.OpenSessions),
|
||||
new("mxgateway.workers.running", snapshot.WorkersRunning),
|
||||
new("mxgateway.events.worker_queue.depth", snapshot.WorkerEventQueueDepth),
|
||||
new("mxgateway.events.grpc_stream_queue.depth", snapshot.GrpcEventStreamQueueDepth),
|
||||
new("mxgateway.sessions.opened", snapshot.SessionsOpened),
|
||||
new("mxgateway.sessions.closed", snapshot.SessionsClosed),
|
||||
new("mxgateway.commands.started", snapshot.CommandsStarted),
|
||||
new("mxgateway.commands.succeeded", snapshot.CommandsSucceeded),
|
||||
new("mxgateway.commands.failed", snapshot.CommandsFailed),
|
||||
new("mxgateway.events.received", snapshot.EventsReceived),
|
||||
new("mxgateway.queues.overflows", snapshot.QueueOverflows),
|
||||
new("mxgateway.faults", snapshot.Faults),
|
||||
new("mxgateway.workers.killed", snapshot.WorkerKills),
|
||||
new("mxgateway.workers.exited", snapshot.WorkerExits),
|
||||
new("mxgateway.heartbeats.failed", snapshot.HeartbeatFailures),
|
||||
new("mxgateway.grpc.streams.disconnected", snapshot.StreamDisconnects),
|
||||
];
|
||||
|
||||
metrics.AddRange(snapshot.CommandFailuresByMethod
|
||||
.OrderBy(entry => entry.Key, StringComparer.OrdinalIgnoreCase)
|
||||
.Select(entry => new DashboardMetricSummary("mxgateway.commands.failed", entry.Value, entry.Key)));
|
||||
metrics.AddRange(snapshot.EventsByFamily
|
||||
.OrderBy(entry => entry.Key, StringComparer.OrdinalIgnoreCase)
|
||||
.Select(entry => new DashboardMetricSummary("mxgateway.events.received", entry.Value, entry.Key)));
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
private IReadOnlyList<DashboardFaultSummary> CreateFaultSummaries(
|
||||
IReadOnlyList<GatewaySession> sessions,
|
||||
DateTimeOffset generatedAt)
|
||||
{
|
||||
return sessions
|
||||
.Where(HasFault)
|
||||
.Take(ResolveLimit(_recentFaultLimit))
|
||||
.Select(session => new DashboardFaultSummary(
|
||||
Source: session.WorkerClient?.State == WorkerClientState.Faulted ? "Worker" : "Session",
|
||||
SessionId: session.SessionId,
|
||||
WorkerProcessId: session.WorkerProcessId,
|
||||
State: session.WorkerClient?.State == WorkerClientState.Faulted
|
||||
? WorkerClientState.Faulted.ToString()
|
||||
: session.State.ToString(),
|
||||
Message: DashboardRedactor.Redact(session.FinalFault) ?? "Faulted",
|
||||
ObservedAt: generatedAt))
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
private async Task RefreshApiKeySummariesAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (!await _apiKeySummaryRefreshGate.WaitAsync(0, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using CancellationTokenSource timeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||
timeout.CancelAfter(_apiKeySummaryRefreshTimeout);
|
||||
IReadOnlyList<DashboardApiKeySummary> summaries = (await _apiKeyAdminStore.ListAsync(timeout.Token)
|
||||
.ConfigureAwait(false))
|
||||
.Select(key => new DashboardApiKeySummary(
|
||||
KeyId: key.KeyId,
|
||||
DisplayName: key.DisplayName,
|
||||
Scopes: key.Scopes,
|
||||
Constraints: key.Constraints,
|
||||
CreatedUtc: key.CreatedUtc,
|
||||
LastUsedUtc: key.LastUsedUtc,
|
||||
RevokedUtc: key.RevokedUtc))
|
||||
.ToArray();
|
||||
|
||||
Volatile.Write(ref _apiKeySummaries, summaries);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Timed out refreshing dashboard API key summaries after {Timeout}.",
|
||||
_apiKeySummaryRefreshTimeout);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
_logger.LogWarning("Failed to refresh dashboard API key summaries.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
_apiKeySummaryRefreshGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private static bool HasFault(GatewaySession session)
|
||||
{
|
||||
return session.State == ZB.MOM.WW.MxGateway.Contracts.Proto.SessionState.Faulted
|
||||
|| session.WorkerClient?.State == WorkerClientState.Faulted
|
||||
|| !string.IsNullOrWhiteSpace(session.FinalFault);
|
||||
}
|
||||
|
||||
private static int ResolveLimit(int configuredLimit)
|
||||
{
|
||||
return configuredLimit < 0 ? 0 : configuredLimit;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user