Improve gateway reliability and dashboard docs
This commit is contained in:
@@ -21,6 +21,11 @@ public static class DashboardDisplay
|
||||
return string.IsNullOrWhiteSpace(value) ? "-" : value;
|
||||
}
|
||||
|
||||
public static string Count(long value)
|
||||
{
|
||||
return value.ToString("N0", System.Globalization.CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
public static long MetricValue(DashboardSnapshot snapshot, string name, string? dimension = null)
|
||||
{
|
||||
return snapshot.Metrics.FirstOrDefault(metric =>
|
||||
|
||||
@@ -20,13 +20,13 @@ else
|
||||
|
||||
<section class="metric-grid">
|
||||
<MetricCard Label="Uptime" Value="@DashboardDisplay.Duration(Snapshot.GatewayUptime)" Detail="@Snapshot.GatewayVersion" />
|
||||
<MetricCard Label="Open Sessions" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.sessions.open").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Workers Running" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.workers.running").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.queue.depth").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Commands Failed" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.commands.failed").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Faults" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.faults").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Open Sessions" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.sessions.open"))" />
|
||||
<MetricCard Label="Workers Running" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.workers.running"))" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.worker_queue.depth"))" />
|
||||
<MetricCard Label="Commands Failed" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.commands.failed"))" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received"))" />
|
||||
<MetricCard Label="Faults" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.faults"))" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows"))" />
|
||||
</section>
|
||||
|
||||
<section class="dashboard-section">
|
||||
|
||||
@@ -18,10 +18,11 @@ else
|
||||
</div>
|
||||
|
||||
<section class="metric-grid compact">
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Event Queue Depth" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.queue.depth").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Stream Disconnects" Value="@DashboardDisplay.MetricValue(Snapshot, "mxgateway.grpc.streams.disconnected").ToString(System.Globalization.CultureInfo.InvariantCulture)" />
|
||||
<MetricCard Label="Events Received" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.received"))" />
|
||||
<MetricCard Label="Worker Event Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.worker_queue.depth"))" />
|
||||
<MetricCard Label="Stream Queue Depth" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.events.grpc_stream_queue.depth"))" />
|
||||
<MetricCard Label="Queue Overflows" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.queues.overflows"))" />
|
||||
<MetricCard Label="Stream Disconnects" Value="@DashboardDisplay.Count(DashboardDisplay.MetricValue(Snapshot, "mxgateway.grpc.streams.disconnected"))" />
|
||||
</section>
|
||||
|
||||
<section class="dashboard-section">
|
||||
@@ -47,7 +48,7 @@ else
|
||||
{
|
||||
<tr>
|
||||
<td>@metric.Dimension</td>
|
||||
<td>@metric.Value</td>
|
||||
<td>@DashboardDisplay.Count(metric.Value)</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
|
||||
@@ -39,6 +39,7 @@ else
|
||||
<tr><th scope="row">Opened</th><td>@DashboardDisplay.DateTime(CurrentSession.OpenedAt)</td></tr>
|
||||
<tr><th scope="row">Last activity</th><td>@DashboardDisplay.DateTime(CurrentSession.LastClientActivityAt)</td></tr>
|
||||
<tr><th scope="row">Lease expires</th><td>@DashboardDisplay.DateTime(CurrentSession.LeaseExpiresAt)</td></tr>
|
||||
<tr><th scope="row">Events received</th><td>@DashboardDisplay.Count(CurrentSession.EventsReceived)</td></tr>
|
||||
<tr><th scope="row">Last fault</th><td>@DashboardDisplay.Text(CurrentSession.LastFault)</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -33,6 +33,7 @@ else
|
||||
<th scope="col">Client</th>
|
||||
<th scope="col">Backend</th>
|
||||
<th scope="col">Worker</th>
|
||||
<th scope="col">Events</th>
|
||||
<th scope="col">Opened</th>
|
||||
<th scope="col">Activity</th>
|
||||
<th scope="col">Heartbeat</th>
|
||||
@@ -54,6 +55,7 @@ else
|
||||
<span class="ms-1"><StatusBadge Text="@session.WorkerState.ToString()" /></span>
|
||||
}
|
||||
</td>
|
||||
<td>@DashboardDisplay.Count(session.EventsReceived)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.OpenedAt)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.LastClientActivityAt)</td>
|
||||
<td>@DashboardDisplay.DateTime(session.LastWorkerHeartbeatAt)</td>
|
||||
|
||||
@@ -16,4 +16,5 @@ public sealed record DashboardSessionSummary(
|
||||
int? WorkerProcessId,
|
||||
WorkerClientState? WorkerState,
|
||||
DateTimeOffset? LastWorkerHeartbeatAt,
|
||||
long EventsReceived,
|
||||
string? LastFault);
|
||||
|
||||
@@ -45,15 +45,15 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
IReadOnlyList<GatewaySession> sessions = _sessionRegistry.Snapshot()
|
||||
.OrderByDescending(session => session.OpenedAt)
|
||||
.ToArray();
|
||||
GatewayMetricsSnapshot metricsSnapshot = _metrics.GetSnapshot();
|
||||
IReadOnlyList<DashboardSessionSummary> sessionSummaries = sessions
|
||||
.Take(ResolveLimit(_recentSessionLimit))
|
||||
.Select(CreateSessionSummary)
|
||||
.Select(session => CreateSessionSummary(session, metricsSnapshot))
|
||||
.ToArray();
|
||||
IReadOnlyList<DashboardWorkerSummary> workerSummaries = sessions
|
||||
.Where(session => session.WorkerClient is not null)
|
||||
.Where(session => session.WorkerClient is { State: not WorkerClientState.Closed })
|
||||
.Select(CreateWorkerSummary)
|
||||
.ToArray();
|
||||
GatewayMetricsSnapshot metricsSnapshot = _metrics.GetSnapshot();
|
||||
|
||||
return new DashboardSnapshot(
|
||||
GeneratedAt: generatedAt,
|
||||
@@ -100,9 +100,12 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
}
|
||||
}
|
||||
|
||||
private static DashboardSessionSummary CreateSessionSummary(GatewaySession session)
|
||||
private static DashboardSessionSummary CreateSessionSummary(
|
||||
GatewaySession session,
|
||||
GatewayMetricsSnapshot metricsSnapshot)
|
||||
{
|
||||
IWorkerClient? workerClient = session.WorkerClient;
|
||||
metricsSnapshot.EventsBySession.TryGetValue(session.SessionId, out long eventsReceived);
|
||||
|
||||
return new DashboardSessionSummary(
|
||||
SessionId: session.SessionId,
|
||||
@@ -117,6 +120,7 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
WorkerProcessId: workerClient?.ProcessId,
|
||||
WorkerState: workerClient?.State,
|
||||
LastWorkerHeartbeatAt: workerClient?.LastHeartbeatAt,
|
||||
EventsReceived: eventsReceived,
|
||||
LastFault: DashboardRedactor.Redact(session.FinalFault));
|
||||
}
|
||||
|
||||
@@ -138,7 +142,8 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService
|
||||
[
|
||||
new("mxgateway.sessions.open", snapshot.OpenSessions),
|
||||
new("mxgateway.workers.running", snapshot.WorkersRunning),
|
||||
new("mxgateway.events.queue.depth", snapshot.EventQueueDepth),
|
||||
new("mxgateway.events.worker_queue.depth", snapshot.WorkerEventQueueDepth),
|
||||
new("mxgateway.events.grpc_stream_queue.depth", snapshot.GrpcEventStreamQueueDepth),
|
||||
new("mxgateway.sessions.opened", snapshot.SessionsOpened),
|
||||
new("mxgateway.sessions.closed", snapshot.SessionsClosed),
|
||||
new("mxgateway.commands.started", snapshot.CommandsStarted),
|
||||
|
||||
Reference in New Issue
Block a user