Second re-review pass at commit a020350 caught 48 new findings — including
one High-severity regression I introduced in the prior sweep — and fixed
them all in one parallel wave.
High (1)
- Client.Python-018: prior sweep set `license = "Proprietary"` in
pyproject.toml. setuptools >= 77 enforces PEP 639 and rejects the
string (it must be a valid SPDX expression), so `pip wheel .` and
`pip install -e .` both fail before any source compiles. Tests
still pass because pytest bypasses the build backend via
`pythonpath`. Dropped the invalid license string, kept the
`License :: Other/Proprietary License` classifier, and added
`tests/test_packaging.py` so a future regression of the same shape
is caught in CI.
Mediums (6)
- Worker-023: `HeartbeatStuckCeiling` (default 75s = 5x HeartbeatGrace)
on WorkerPipeSessionOptions bounds the in-flight-command watchdog
suppression so a truly stuck COM call still triggers StaHung
instead of permanently defeating the watchdog.
- Client.Rust-018: reverted Rust's `latencyMs` split so the
cross-language bench comparison is apples-to-apples again;
`failureLatencyMs` kept as Rust-only enrichment.
- Client.Java-021: applied Client.Java-002's terminal-state
serialisation pattern to DeployEventStream so close() arriving
after queue-overflow can't erase the overflow exception.
- IntegrationTests-017: teardown-parity test now uses a two-window
stability check after UnAdvise instead of strict equality against
the pre-UnAdvise count (which raced against in-flight events).
- IntegrationTests-019: new RecordingTestOutputHelper wraps every
log sink the WriteSecured live test owns (worker stdout/stderr,
gateway logs, direct WriteLine) so the credential is proven
absent from the full output buffer, not just the diagnostic
message.
- Tests-020: added MxAccessGatewayServiceConstraintTests coverage
for the previously-uncovered Write2Bulk and WriteSecured2Bulk
arms of WriteBulkConstraintPlan.SetPayload.
Lows (41 — highlights)
- Server: Galaxy glob cache eviction is race-free (Server-024);
GalaxyRepositoryGrpcService takes IGalaxyRepository (Server-025);
AlarmsOptions validated at startup (Server-026); Authorization.md
Constraint Enforcement snippet/prose enumerate the bulk write/read
family (Server-027); bulk-read-commands and bulk-write-commands
capability tokens added to OpenSession (Server-029);
NotWiredAlarmRpcDispatcher XML doc and missing scope-resolver and
state-machine tests cleaned up (023, 028).
- Worker: AlarmCommandHandler now invokes the same STA-affinity
guard the poll path uses, at every command entry (Worker-024);
RunAsync null-checks the runtime-session factory result
(Worker-025).
- Worker.Tests: shared LiveMxAccessOptInVariableName lives on
GatewayContractInfo (Worker.Tests-025); MxAccessSession.CreateForTesting
rejects production sinks (Worker.Tests-026); FakeRuntimeSession's
CancelCommandReturnValue serialised under lock (Worker.Tests-027);
Probes namespace lifted to MxGateway.Worker.Tests.Probes
(Worker.Tests-029); cancel-envelope sequence numbers monotonised
(Worker.Tests-030); docs/GatewayTesting.md gains a "Dev-rig Probes"
section (Worker.Tests-028).
- Tests: ManualTimeProvider consolidated into one TestSupport/ copy
(Tests-021); SessionManagerBulkTests adds a mid-flight cancellation
test backed by a TaskCompletionSource fake (Tests-022); companion
FakeWorkerProcess.WaitForExitAsync no longer fakes its exit signal
(Tests-023); constraint plan reply-count divergence pinned
(Tests-024).
- IntegrationTests: TryGetSession chain carries [MaybeNullWhen(false)]
end-to-end (IntegrationTests-018); abnormal-exit keyword set
tightened to pipe-disconnected/end-of-stream and the test now
asserts streamTask.IsFaulted (020, 021).
- Client.Dotnet: bench commands added to isLongRunning so the
default 30s wall-clock budget doesn't kill them (015);
BenchStreamEventsAsync observes the inner stream task on every
exit path (016).
- Client.Go: parseValue wraps strconv errors with flag context and
%w (017); bench loops honour ctx.Done() (018); galaxy-watch parses
RFC3339Nano with fractional seconds (019); runStreamEvents installs
signal.NotifyContext like runGalaxyWatch (020); five new CLI-level
table-driven tests cover the bulk/bench subcommands (021).
- Client.Java: toCompletable Javadoc rewritten to match the actual
cancellation contract Client.Java-015 established (022); stream-events
text path uses Long.toUnsignedString for worker_sequence (023);
bench-read-bulk no longer pollutes success-latency histogram with
failure durations (024); --shutdown-timeout CLI option propagates
through to ClientOptions (025); seven new MxGatewayCliTests cover
the bulk and bench commands (026).
- Client.Python: mxgateway_cli ships its own py.typed marker (019);
wheel-build smoke test added under tests/test_packaging.py (020);
README documents the Galaxy CLI parity gap explicitly (021).
- Client.Rust: RustClientDesign.md signatures match session.rs and
document the AsRef<str> read_bulk genericism (019);
next_correlation_id re-exported at the crate root, with a
property-style doc contract and an explicit disclaimer that the
literal textual format is not part of the contract (020).
- Contracts: BulkWriteResult comment names the actual
IConstraintEnforcer mechanism instead of "tag-allowlist filter"
(014); BulkReadResult gains explicit per-arm payload-population
documentation for the success vs failure cases (015).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,7 @@ public sealed class GatewayOptionsValidator : IValidateOptions<GatewayOptions>
|
||||
ValidateEvents(options.Events, failures);
|
||||
ValidateDashboard(options.Dashboard, failures);
|
||||
ValidateProtocol(options.Protocol, failures);
|
||||
ValidateAlarms(options.Alarms, failures);
|
||||
|
||||
return failures.Count == 0
|
||||
? ValidateOptionsResult.Success
|
||||
@@ -228,6 +229,33 @@ public sealed class GatewayOptionsValidator : IValidateOptions<GatewayOptions>
|
||||
failures);
|
||||
}
|
||||
|
||||
private static void ValidateAlarms(AlarmsOptions options, List<string> failures)
|
||||
{
|
||||
if (!options.Enabled)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// When the alarm auto-subscribe hook is enabled, the gateway needs either a
|
||||
// canonical SubscriptionExpression or a DefaultArea to compose one from. Both
|
||||
// empty is the configuration mistake SessionManager.TryAutoSubscribeAlarmsAsync
|
||||
// currently surfaces per-session — pulling it up to startup validation makes
|
||||
// the misconfiguration fail-fast at boot, in line with every other section.
|
||||
if (string.IsNullOrWhiteSpace(options.SubscriptionExpression)
|
||||
&& string.IsNullOrWhiteSpace(options.DefaultArea))
|
||||
{
|
||||
failures.Add(
|
||||
"MxGateway:Alarms requires either a non-blank SubscriptionExpression or a non-blank DefaultArea when Enabled is true.");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(options.SubscriptionExpression)
|
||||
&& !options.SubscriptionExpression.StartsWith(@"\\", StringComparison.Ordinal))
|
||||
{
|
||||
failures.Add(
|
||||
@"MxGateway:Alarms:SubscriptionExpression must start with '\\' (canonical \\<host>\Galaxy!<area> shape).");
|
||||
}
|
||||
}
|
||||
|
||||
private static void ValidateProtocol(ProtocolOptions options, List<string> failures)
|
||||
{
|
||||
if (options.WorkerProtocolVersion != GatewayContractInfo.WorkerProtocolVersion)
|
||||
|
||||
@@ -65,15 +65,20 @@ public static class GalaxyGlobMatcher
|
||||
RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Compiled,
|
||||
TimeSpan.FromMilliseconds(100));
|
||||
|
||||
if (RegexCache.TryAdd(glob, compiled))
|
||||
// GetOrAdd atomically returns whichever instance is in the cache after the
|
||||
// call — either the locally-compiled regex (we won the race) or the regex
|
||||
// another thread inserted (we lost). It also avoids the TryAdd-then-indexer
|
||||
// pattern where the key could be evicted between the failed TryAdd and the
|
||||
// indexer read, producing a KeyNotFoundException under contention near the
|
||||
// cap (Server-024).
|
||||
Regex result = RegexCache.GetOrAdd(glob, compiled);
|
||||
if (ReferenceEquals(result, compiled))
|
||||
{
|
||||
// We were the inserter — track for FIFO eviction and bound the cache.
|
||||
InsertionOrder.Enqueue(glob);
|
||||
EvictIfOverCapacity();
|
||||
return compiled;
|
||||
}
|
||||
|
||||
// Another thread won the race — use its compiled regex.
|
||||
return RegexCache[glob];
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void EvictIfOverCapacity()
|
||||
|
||||
@@ -26,7 +26,7 @@ public sealed class EventStreamService(
|
||||
StreamEventsRequest request,
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
if (!sessionManager.TryGetSession(request.SessionId, out GatewaySession session))
|
||||
if (!sessionManager.TryGetSession(request.SessionId, out GatewaySession? session) || session is null)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotFound,
|
||||
|
||||
@@ -17,7 +17,7 @@ namespace MxGateway.Server.Grpc;
|
||||
/// direct SQL probe since callers use it as a health check.
|
||||
/// </summary>
|
||||
public sealed class GalaxyRepositoryGrpcService(
|
||||
GalaxyDb.GalaxyRepository repository,
|
||||
GalaxyDb.IGalaxyRepository repository,
|
||||
GalaxyDb.IGalaxyHierarchyCache cache,
|
||||
GalaxyDb.IGalaxyDeployNotifier notifier,
|
||||
IGatewayRequestIdentityAccessor identityAccessor,
|
||||
|
||||
@@ -54,6 +54,8 @@ public sealed class MxAccessGatewayService(
|
||||
reply.Capabilities.Add("unary-invoke");
|
||||
reply.Capabilities.Add("server-stream-events");
|
||||
reply.Capabilities.Add("bulk-subscribe-commands");
|
||||
reply.Capabilities.Add("bulk-read-commands");
|
||||
reply.Capabilities.Add("bulk-write-commands");
|
||||
reply.Capabilities.Add("unary-acknowledge-alarm");
|
||||
reply.Capabilities.Add("server-stream-active-alarms");
|
||||
|
||||
@@ -253,7 +255,7 @@ public sealed class MxAccessGatewayService(
|
||||
|
||||
private GatewaySession ResolveSession(string sessionId)
|
||||
{
|
||||
if (!sessionManager.TryGetSession(sessionId, out GatewaySession session))
|
||||
if (!sessionManager.TryGetSession(sessionId, out GatewaySession? session) || session is null)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotFound,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace MxGateway.Server.Sessions;
|
||||
@@ -20,7 +21,7 @@ public interface ISessionManager
|
||||
/// <returns>True if the session exists; otherwise false.</returns>
|
||||
bool TryGetSession(
|
||||
string sessionId,
|
||||
out GatewaySession session);
|
||||
[MaybeNullWhen(false)] out GatewaySession session);
|
||||
|
||||
/// <summary>Invokes a command on the worker for the specified session.</summary>
|
||||
/// <param name="sessionId">Identifier of the session.</param>
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace MxGateway.Server.Sessions;
|
||||
|
||||
/// <summary>
|
||||
@@ -28,7 +30,7 @@ public interface ISessionRegistry
|
||||
/// <param name="sessionId">Identifier of the session.</param>
|
||||
/// <param name="session">The retrieved session, if found.</param>
|
||||
/// <returns>True if found; false otherwise.</returns>
|
||||
bool TryGet(string sessionId, out GatewaySession session);
|
||||
bool TryGet(string sessionId, [MaybeNullWhen(false)] out GatewaySession session);
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to remove a session by ID; returns false if not found.
|
||||
@@ -36,7 +38,7 @@ public interface ISessionRegistry
|
||||
/// <param name="sessionId">Identifier of the session to remove.</param>
|
||||
/// <param name="session">The removed session, if found.</param>
|
||||
/// <returns>True if removed; false if not found.</returns>
|
||||
bool TryRemove(string sessionId, out GatewaySession session);
|
||||
bool TryRemove(string sessionId, [MaybeNullWhen(false)] out GatewaySession session);
|
||||
|
||||
/// <summary>
|
||||
/// Returns a snapshot of all sessions in the registry.
|
||||
|
||||
@@ -8,20 +8,19 @@ using MxGateway.Server.Grpc;
|
||||
namespace MxGateway.Server.Sessions;
|
||||
|
||||
/// <summary>
|
||||
/// PR A.6 / A.7 — default <see cref="IAlarmRpcDispatcher"/> shipped while
|
||||
/// the worker-side AlarmClient event subscription is gated on dev-rig
|
||||
/// validation. Acknowledges with a structured "worker-pending"
|
||||
/// Null fallback <see cref="IAlarmRpcDispatcher"/> used when no dispatcher
|
||||
/// is registered in the DI container (DI omission or standalone tests).
|
||||
/// Acknowledges with a structured "alarm dispatcher not registered"
|
||||
/// diagnostic and yields an empty active-alarm stream.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Replaces the inline diagnostic strings in
|
||||
/// <c>MxAccessGatewayService.AcknowledgeAlarm</c> /
|
||||
/// <c>QueryActiveAlarms</c> from PR A.3 with an injectable seam.
|
||||
/// When the worker dispatcher (PR A.6/A.7 dev-rig follow-up) lands,
|
||||
/// <c>WorkerAlarmRpcDispatcher</c> replaces this implementation in
|
||||
/// the DI container and the same handler shape comes alive without
|
||||
/// further changes to the public RPC surface.
|
||||
/// Production wires <see cref="WorkerAlarmRpcDispatcher"/> as the
|
||||
/// default <see cref="IAlarmRpcDispatcher"/> via
|
||||
/// <c>SessionServiceCollectionExtensions.AddGatewaySessions</c>, so
|
||||
/// clients that hit this fallback are running against an
|
||||
/// intentionally minimal service composition rather than the full
|
||||
/// gateway.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class NotWiredAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
@@ -35,8 +34,8 @@ public sealed class NotWiredAlarmRpcDispatcher : IAlarmRpcDispatcher
|
||||
{
|
||||
SessionId = request.SessionId,
|
||||
CorrelationId = request.ClientCorrelationId,
|
||||
ProtocolStatus = MxAccessGrpcMapper.Ok("AcknowledgeAlarm accepted; worker dispatch pending dev-rig wiring."),
|
||||
DiagnosticMessage = "Gateway-side AcknowledgeAlarm accepted; the worker-side AlarmClient consumer (PR A.5) is in place but the dispatcher hookup is gated on validating the AVEVA alarm-provider event subscription on the dev rig.",
|
||||
ProtocolStatus = MxAccessGrpcMapper.Ok("AcknowledgeAlarm accepted; alarm dispatcher is not registered."),
|
||||
DiagnosticMessage = "Alarm dispatcher is not registered.",
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Security.Cryptography;
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using Microsoft.Extensions.Logging;
|
||||
@@ -132,7 +133,7 @@ public sealed class SessionManager : ISessionManager
|
||||
/// <returns>True if session found; otherwise false.</returns>
|
||||
public bool TryGetSession(
|
||||
string sessionId,
|
||||
out GatewaySession session)
|
||||
[MaybeNullWhen(false)] out GatewaySession session)
|
||||
{
|
||||
return _registry.TryGet(sessionId, out session);
|
||||
}
|
||||
@@ -297,7 +298,7 @@ public sealed class SessionManager : ISessionManager
|
||||
|
||||
private GatewaySession GetRequiredSession(string sessionId)
|
||||
{
|
||||
if (!_registry.TryGet(sessionId, out GatewaySession session))
|
||||
if (!_registry.TryGet(sessionId, out GatewaySession? session) || session is null)
|
||||
{
|
||||
throw new SessionManagerException(
|
||||
SessionManagerErrorCode.SessionNotFound,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace MxGateway.Server.Sessions;
|
||||
@@ -38,9 +39,9 @@ public sealed class SessionRegistry : ISessionRegistry
|
||||
/// <param name="session">The retrieved session if found.</param>
|
||||
public bool TryGet(
|
||||
string sessionId,
|
||||
out GatewaySession session)
|
||||
[MaybeNullWhen(false)] out GatewaySession session)
|
||||
{
|
||||
return _sessions.TryGetValue(sessionId, out session!);
|
||||
return _sessions.TryGetValue(sessionId, out session);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -50,9 +51,9 @@ public sealed class SessionRegistry : ISessionRegistry
|
||||
/// <param name="session">The removed session if found.</param>
|
||||
public bool TryRemove(
|
||||
string sessionId,
|
||||
out GatewaySession session)
|
||||
[MaybeNullWhen(false)] out GatewaySession session)
|
||||
{
|
||||
return _sessions.TryRemove(sessionId, out session!);
|
||||
return _sessions.TryRemove(sessionId, out session);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -76,7 +76,7 @@ public sealed class WorkerAlarmRpcDispatcher(
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession session))
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession? session) || session is null)
|
||||
{
|
||||
return new AcknowledgeAlarmReply
|
||||
{
|
||||
@@ -186,7 +186,7 @@ public sealed class WorkerAlarmRpcDispatcher(
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(request);
|
||||
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession session))
|
||||
if (!sessionRegistry.TryGet(request.SessionId, out GatewaySession? session) || session is null)
|
||||
{
|
||||
// Server-019: align with AcknowledgeAsync's missing-session handling and
|
||||
// surface a SessionNotFound error rather than yielding an empty stream.
|
||||
|
||||
Reference in New Issue
Block a user