fix: resolve code-review findings (locally verified)

Server-054/055/056, Contracts-020/021/022, Tests-036/038/039,
IntegrationTests-030/031/032 (+033 deferred to live rig),
Client.Dotnet-026/028/029 (+027 won't-fix), Client.Go-030..034,
Client.Python-032..036, Client.Rust-033..038.

Key fix: SessionEventDistributor orphaned a subscriber that registered after
the pump completed but before disposal (Server-056) -> register paths now
complete late registrants under _lifecycleLock; regression test added. The
racy dashboard-mirror gRPC test made deterministic (Tests-039).

Verified green locally: gateway Tests targeted classes (GatewaySession,
SessionEventDistributor, GatewayOptionsValidator, ProtobufContractRoundTrip,
GatewaySessionDashboardMirror) + dotnet/go/python/rust client suites.
This commit is contained in:
Joseph Doherty
2026-06-17 05:23:14 -04:00
parent 25d04ec37e
commit 6b5fe6aa82
37 changed files with 1049 additions and 211 deletions
+22 -7
View File
@@ -837,7 +837,14 @@ func runStreamEvents(ctx context.Context, args []string, stdout, stderr io.Write
defer client.Close()
session := mxgateway.NewSessionForID(client, *sessionID)
streamCtx, cancelStream := context.WithCancel(ctx)
// Ctrl+C on a long-running stream-events command cancels the gRPC stream
// cleanly (the gateway sees codes.Canceled rather than a torn TCP
// connection) and the deferred subscription.Close()/client.Close() run.
signalCtx, stopSignals := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM)
defer stopSignals()
streamCtx, cancelStream := context.WithCancel(signalCtx)
defer cancelStream()
subscription, err := session.SubscribeEventsAfter(streamCtx, *after)
if err != nil {
@@ -1035,15 +1042,17 @@ func runSmoke(ctx context.Context, args []string, stdout, stderr io.Writer) erro
}
func closeSmokeSession(ctx context.Context, session *mxgateway.Session, primaryErr error) error {
closeCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// Compute the close timeout once so a single context (and a single
// deferred cancel) is allocated: default 5s, shortened to the caller's
// remaining deadline when that is sooner.
closeTimeout := 5 * time.Second
if deadline, ok := ctx.Deadline(); ok {
if until := time.Until(deadline); until > 0 && until < 5*time.Second {
cancel()
closeCtx, cancel = context.WithTimeout(context.Background(), until)
defer cancel()
if until := time.Until(deadline); until > 0 && until < closeTimeout {
closeTimeout = until
}
}
closeCtx, cancel := context.WithTimeout(context.Background(), closeTimeout)
defer cancel()
_, closeErr := session.Close(closeCtx)
if primaryErr != nil {
@@ -1490,6 +1499,12 @@ func runGalaxyWatch(ctx context.Context, args []string, stdout, stderr io.Writer
count++
if *limit > 0 && count >= *limit {
cancelStream()
// Drain so the WatchDeployEvents goroutine can exit instead
// of blocking on a send into the buffered events channel
// while the deferred client.Close() tears the stream down
// underneath it (mirrors the signal-cancel branch below).
for range events {
}
return nil
}
case streamErr, ok := <-errs: