diff --git a/Component-CLI.md b/Component-CLI.md index 1abacae..eebf2a5 100644 --- a/Component-CLI.md +++ b/Component-CLI.md @@ -169,6 +169,11 @@ scadalink health event-log --site-identifier [--from ] [--to [--page ] [--page-size ] [--format json|table] ``` +### Debug Commands +``` +scadalink debug snapshot --id [--format json|table] +``` + ### Shared Script Commands ``` scadalink shared-script list [--format json|table] diff --git a/Component-Commons.md b/Component-Commons.md index 1f1d35d..0ec53b3 100644 --- a/Component-Commons.md +++ b/Component-Commons.md @@ -103,7 +103,7 @@ Commons must define the shared DTOs and message contracts used for inter-compone - **Health DTOs**: Health check results, site status reports, heartbeat messages. Includes script error rates and alarm evaluation error rates. - **Communication DTOs**: Site identity, connection state, routing metadata. - **Attribute Stream DTOs**: Attribute value change messages (instance name, attribute path, value, quality, timestamp) and alarm state change messages (instance name, alarm name, state, priority, timestamp) for the site-wide Akka stream. -- **Debug View DTOs**: Subscribe/unsubscribe requests, initial snapshot, stream filter criteria. +- **Debug View DTOs**: Subscribe/unsubscribe requests, one-shot snapshot request (`DebugSnapshotRequest`), initial snapshot, stream filter criteria. - **Script Execution DTOs**: Script call requests (with recursion depth), return values, error results. - **System-Wide Artifact DTOs**: Shared script packages, external system definitions, database connection definitions, notification list definitions. diff --git a/Component-Communication.md b/Component-Communication.md index 3a4e33d..f051e7d 100644 --- a/Component-Communication.md +++ b/Component-Communication.md @@ -59,6 +59,14 @@ Both central and site clusters. Each side has communication actors that handle m - Central sends an unsubscribe request when the debug view closes. The site removes its stream subscription. - The stream is session-based and temporary. +### 6a. Debug Snapshot (Central → Site) +- **Pattern**: Request/Response (one-shot, no subscription). +- Central sends a `DebugSnapshotRequest` (identified by instance unique name) to the site. +- Site's Deployment Manager routes to the Instance Actor by unique name. +- Instance Actor builds and returns a `DebugViewSnapshot` with all current attribute values and alarm states (same payload as the streaming initial snapshot). +- No subscription is created; no stream is established. +- Uses the 30-second `QueryTimeout`. + ### 7. Health Reporting (Site → Central) - **Pattern**: Periodic push. - Sites periodically send health metrics (connection status, node status, buffer depth, script error rates, alarm evaluation error rates) to central. @@ -68,6 +76,7 @@ Both central and site clusters. Each side has communication actors that handle m - Central queries sites for: - Parked messages (store-and-forward dead letters). - Site event logs. + - Instance debug snapshots (attribute values and alarm states). - Central can also send management commands: - Retry or discard parked messages. diff --git a/Component-ManagementService.md b/Component-ManagementService.md index 2879752..c40ef0e 100644 --- a/Component-ManagementService.md +++ b/Component-ManagementService.md @@ -124,6 +124,7 @@ The ManagementActor registers itself with `ClusterClientReceptionist` at startup - **QuerySiteEventLog**: Query site event log entries from a remote site (routed via communication layer). Supports date range, keyword search, and pagination. - **QueryParkedMessages**: Query parked (dead-letter) messages at a remote site (routed via communication layer). Supports pagination. +- **DebugSnapshot**: Request a one-shot snapshot of attribute values and alarm states for a running instance. Resolves the instance's site from the config DB and routes via the communication layer. Uses 30s `QueryTimeout`. ## Authorization @@ -131,7 +132,7 @@ Every incoming message carries the authenticated user's identity and roles. The - **Admin** role required for: site management, area management, API key management, role mapping management, scope rule management, system configuration. - **Design** role required for: template authoring (including template member management: attributes, alarms, scripts, compositions), shared scripts, external system definitions, database connection definitions, notification lists, inbound API method definitions. -- **Deployment** role required for: instance management, deployments, debug view, parked message queries, site event log queries. Site scoping is enforced for site-scoped Deployment users. +- **Deployment** role required for: instance management, deployments, debug view, debug snapshot, parked message queries, site event log queries. Site scoping is enforced for site-scoped Deployment users. - **Read-only access** (any authenticated role): health summary, health site, site event log queries, parked message queries. Unauthorized commands receive an `Unauthorized` response message. Failed authorization attempts are not audit logged (consistent with existing behavior). diff --git a/Component-SiteRuntime.md b/Component-SiteRuntime.md index e85ef78..3c3e9ac 100644 --- a/Component-SiteRuntime.md +++ b/Component-SiteRuntime.md @@ -75,6 +75,10 @@ Deployment Manager Singleton (Cluster Singleton) - **Enable**: Creates a new Instance Actor from the stored configuration (same as startup). - **Delete**: Stops the Instance Actor and its children, removes the deployed configuration from local SQLite. Does **not** clear store-and-forward messages. +### Debug Snapshot Routing +- Receives `DebugSnapshotRequest` from the Communication Layer and forwards to the Instance Actor by unique name (same lookup as `SubscribeDebugViewRequest`). +- Returns an error response if no Instance Actor exists for the requested unique name (instance not deployed or not enabled). + --- ## Instance Actor @@ -110,6 +114,7 @@ Deployment Manager Singleton (Cluster Singleton) ### Debug View Support - On request from central (via Communication Layer), the Instance Actor provides a **snapshot** of all current attribute values and alarm states. - Subsequent changes are delivered via the site-wide Akka stream, filtered by instance unique name. +- The Instance Actor also handles one-shot `DebugSnapshotRequest` messages: it builds the same snapshot (attribute values and alarm states) and replies directly to the sender. Unlike `SubscribeDebugViewRequest`, no subscriber is registered and no stream is established. ### Supervision Strategy diff --git a/src/ScadaLink.CLI/Commands/DebugCommands.cs b/src/ScadaLink.CLI/Commands/DebugCommands.cs new file mode 100644 index 0000000..f5b6287 --- /dev/null +++ b/src/ScadaLink.CLI/Commands/DebugCommands.cs @@ -0,0 +1,31 @@ +using System.CommandLine; +using System.CommandLine.Parsing; +using ScadaLink.Commons.Messages.Management; + +namespace ScadaLink.CLI.Commands; + +public static class DebugCommands +{ + public static Command Build(Option contactPointsOption, Option formatOption) + { + var command = new Command("debug") { Description = "Runtime debugging" }; + + command.Add(BuildSnapshot(contactPointsOption, formatOption)); + + return command; + } + + private static Command BuildSnapshot(Option contactPointsOption, Option formatOption) + { + var idOption = new Option("--id") { Description = "Instance ID", Required = true }; + var cmd = new Command("snapshot") { Description = "Get a point-in-time snapshot of instance attribute values and alarm states" }; + cmd.Add(idOption); + cmd.SetAction(async (ParseResult result) => + { + return await CommandHelpers.ExecuteCommandAsync( + result, contactPointsOption, formatOption, + new DebugSnapshotCommand(result.GetValue(idOption))); + }); + return cmd; + } +} diff --git a/src/ScadaLink.CLI/Program.cs b/src/ScadaLink.CLI/Program.cs index 143f1c0..5ba8b89 100644 --- a/src/ScadaLink.CLI/Program.cs +++ b/src/ScadaLink.CLI/Program.cs @@ -26,6 +26,7 @@ rootCommand.Add(NotificationCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(SecurityCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(AuditLogCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(HealthCommands.Build(contactPointsOption, formatOption)); +rootCommand.Add(DebugCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(SharedScriptCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(DbConnectionCommands.Build(contactPointsOption, formatOption)); rootCommand.Add(ApiMethodCommands.Build(contactPointsOption, formatOption)); diff --git a/src/ScadaLink.Commons/Messages/DebugView/DebugSnapshotRequest.cs b/src/ScadaLink.Commons/Messages/DebugView/DebugSnapshotRequest.cs new file mode 100644 index 0000000..a82c780 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/DebugView/DebugSnapshotRequest.cs @@ -0,0 +1,5 @@ +namespace ScadaLink.Commons.Messages.DebugView; + +public record DebugSnapshotRequest( + string InstanceUniqueName, + string CorrelationId); diff --git a/src/ScadaLink.Commons/Messages/Management/DebugCommands.cs b/src/ScadaLink.Commons/Messages/Management/DebugCommands.cs new file mode 100644 index 0000000..24292d0 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/Management/DebugCommands.cs @@ -0,0 +1,3 @@ +namespace ScadaLink.Commons.Messages.Management; + +public record DebugSnapshotCommand(int InstanceId); diff --git a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs index 0b3512b..2040268 100644 --- a/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/SiteCommunicationActor.cs @@ -104,6 +104,9 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers Receive(msg => _deploymentManagerProxy.Forward(msg)); Receive(msg => _deploymentManagerProxy.Forward(msg)); + // Pattern 6a: Debug Snapshot (one-shot) — forward to Deployment Manager + Receive(msg => _deploymentManagerProxy.Forward(msg)); + // Pattern 7: Remote Queries Receive(msg => { diff --git a/src/ScadaLink.Communication/CommunicationService.cs b/src/ScadaLink.Communication/CommunicationService.cs index 5140d6e..8a247dd 100644 --- a/src/ScadaLink.Communication/CommunicationService.cs +++ b/src/ScadaLink.Communication/CommunicationService.cs @@ -130,7 +130,17 @@ public class CommunicationService GetActor().Tell(new SiteEnvelope(siteId, request)); } - // ── Pattern 6: Health Reporting (site→central, Tell) ── + // ── Pattern 6a: Debug Snapshot (one-shot, request/response) ── + + public async Task RequestDebugSnapshotAsync( + string siteId, DebugSnapshotRequest request, CancellationToken cancellationToken = default) + { + var envelope = new SiteEnvelope(siteId, request); + return await GetActor().Ask( + envelope, _options.QueryTimeout, cancellationToken); + } + + // ── Pattern 6b: Health Reporting (site→central, Tell) ── // Health reports are received by central, not sent. No method needed here. // ── Pattern 7: Remote Queries ── diff --git a/src/ScadaLink.ManagementService/ManagementActor.cs b/src/ScadaLink.ManagementService/ManagementActor.cs index d6e51f6..538d286 100644 --- a/src/ScadaLink.ManagementService/ManagementActor.cs +++ b/src/ScadaLink.ManagementService/ManagementActor.cs @@ -12,6 +12,7 @@ using ScadaLink.Commons.Entities.Notifications; using ScadaLink.Commons.Entities.Security; using ScadaLink.Commons.Entities.Sites; using ScadaLink.Commons.Interfaces.Repositories; +using ScadaLink.Commons.Messages.DebugView; using ScadaLink.Commons.Messages.Management; using ScadaLink.Commons.Messages.RemoteQuery; using ScadaLink.DeploymentManager; @@ -109,7 +110,8 @@ public class ManagementActor : ReceiveActor CreateInstanceCommand or MgmtDeployInstanceCommand or MgmtEnableInstanceCommand or MgmtDisableInstanceCommand or MgmtDeleteInstanceCommand or SetConnectionBindingsCommand - or MgmtDeployArtifactsCommand => "Deployment", + or MgmtDeployArtifactsCommand + or DebugSnapshotCommand => "Deployment", // Read-only queries -- any authenticated user _ => null @@ -234,6 +236,7 @@ public class ManagementActor : ReceiveActor // Remote Queries QueryEventLogsCommand cmd => await HandleQueryEventLogs(sp, cmd), QueryParkedMessagesCommand cmd => await HandleQueryParkedMessages(sp, cmd), + DebugSnapshotCommand cmd => await HandleDebugSnapshot(sp, cmd), _ => throw new NotSupportedException($"Unknown command type: {command.GetType().Name}") }; @@ -1105,4 +1108,19 @@ public class ManagementActor : ReceiveActor DateTimeOffset.UtcNow); return await commService.QueryParkedMessagesAsync(cmd.SiteIdentifier, request); } + + private static async Task HandleDebugSnapshot(IServiceProvider sp, DebugSnapshotCommand cmd) + { + var instanceRepo = sp.GetRequiredService(); + var instance = await instanceRepo.GetInstanceByIdAsync(cmd.InstanceId) + ?? throw new InvalidOperationException($"Instance {cmd.InstanceId} not found."); + + var siteRepo = sp.GetRequiredService(); + var site = await siteRepo.GetSiteByIdAsync(instance.SiteId) + ?? throw new InvalidOperationException($"Site {instance.SiteId} not found."); + + var commService = sp.GetRequiredService(); + var request = new DebugSnapshotRequest(instance.UniqueName, Guid.NewGuid().ToString("N")); + return await commService.RequestDebugSnapshotAsync(site.SiteIdentifier, request); + } } diff --git a/src/ScadaLink.SiteRuntime/Actors/DeploymentManagerActor.cs b/src/ScadaLink.SiteRuntime/Actors/DeploymentManagerActor.cs index df5dec8..7e1b46b 100644 --- a/src/ScadaLink.SiteRuntime/Actors/DeploymentManagerActor.cs +++ b/src/ScadaLink.SiteRuntime/Actors/DeploymentManagerActor.cs @@ -71,6 +71,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers // Debug View — route to Instance Actors Receive(RouteDebugViewSubscribe); Receive(RouteDebugViewUnsubscribe); + Receive(RouteDebugSnapshot); // Internal startup messages Receive(HandleStartupConfigsLoaded); @@ -453,6 +454,22 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers } } + private void RouteDebugSnapshot(DebugSnapshotRequest request) + { + if (_instanceActors.TryGetValue(request.InstanceUniqueName, out var instanceActor)) + { + instanceActor.Forward(request); + } + else + { + _logger.LogWarning( + "Debug snapshot for unknown instance {Instance}", request.InstanceUniqueName); + Sender.Tell(new DebugViewSnapshot( + request.InstanceUniqueName, Array.Empty(), + Array.Empty(), DateTimeOffset.UtcNow)); + } + } + /// /// WP-33: Handles system-wide artifact deployment (shared scripts, external systems, etc.). /// Persists artifacts to SiteStorageService and recompiles shared scripts. diff --git a/src/ScadaLink.SiteRuntime/Actors/InstanceActor.cs b/src/ScadaLink.SiteRuntime/Actors/InstanceActor.cs index fb2e7a4..09fa16c 100644 --- a/src/ScadaLink.SiteRuntime/Actors/InstanceActor.cs +++ b/src/ScadaLink.SiteRuntime/Actors/InstanceActor.cs @@ -133,6 +133,9 @@ public class InstanceActor : ReceiveActor Receive(HandleSubscribeDebugView); Receive(HandleUnsubscribeDebugView); + // Debug snapshot (one-shot, no subscription) + Receive(HandleDebugSnapshot); + // Handle internal messages Receive(HandleOverridesLoaded); } @@ -399,6 +402,35 @@ public class InstanceActor : ReceiveActor _instanceUniqueName, request.CorrelationId); } + /// + /// One-shot debug snapshot — returns current state without registering a subscriber. + /// + private void HandleDebugSnapshot(DebugSnapshotRequest request) + { + var attributeValues = _attributes.Select(kvp => new AttributeValueChanged( + _instanceUniqueName, + kvp.Key, + kvp.Key, + kvp.Value, + _attributeQualities.GetValueOrDefault(kvp.Key, "Good"), + DateTimeOffset.UtcNow)).ToList(); + + var alarmStates = _alarmStates.Select(kvp => new AlarmStateChanged( + _instanceUniqueName, + kvp.Key, + kvp.Value, + 0, + DateTimeOffset.UtcNow)).ToList(); + + var snapshot = new DebugViewSnapshot( + _instanceUniqueName, + attributeValues, + alarmStates, + DateTimeOffset.UtcNow); + + Sender.Tell(snapshot); + } + /// /// Publishes attribute change to stream and notifies child Script/Alarm actors. /// WP-22: Tell for attribute notifications (fire-and-forget, never blocks).