Resolve Client.Go-004..010 code-review findings

Client.Go-004: ran gofmt on alarms_test.go and galaxy_test.go; the tree is
now gofmt-clean.

Client.Go-005/009/010: migrated Dial/DialGalaxy off the deprecated
grpc.DialContext/WithBlock to grpc.NewClient via a shared dial helper, with
a DialTimeout-bounded readiness probe to keep fail-fast semantics; shared
callContext deadline arithmetic; updated the stale Dial doc comment. Test
harnesses use passthrough:///bufnet for the NewClient default-scheme change.

Client.Go-006: added GatewayError.Code() and an IsTransient(err) helper so
callers can classify transient gRPC failures.

Client.Go-007: newCorrelationID no longer returns an empty id when
crypto/rand fails — it falls back to a non-empty time+counter id.

Client.Go-008: added coverage_test.go for transport-credential resolution,
callContext deadline arithmetic, and native value/array edge kinds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 22:42:33 -04:00
parent 89043cb2b6
commit 555fe4c0ba
10 changed files with 574 additions and 82 deletions
+68 -15
View File
@@ -19,6 +19,7 @@ import (
pb "gitea.dohertylan.com/dohertj2/mxaccessgw/clients/go/internal/generated"
"google.golang.org/grpc"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/protobuf/types/known/durationpb"
@@ -36,22 +37,36 @@ type Client struct {
opts Options
}
// Dial opens a gRPC connection to the gateway and configures auth metadata,
// transport security, and blocking dial cancellation from ctx.
// Dial opens a gRPC connection to the gateway and configures auth metadata
// and transport security.
//
// The connection is created lazily with grpc.NewClient: the channel is not
// established until the first RPC (or the readiness probe below) needs it, so
// a gateway that is briefly unavailable at Dial time no longer turns into a
// hard error — the connection recovers when the gateway comes up. To preserve
// fail-fast behavior, Dial then runs an explicit readiness probe bounded by
// DialTimeout (default 10s, or ctx's deadline when sooner): it triggers the
// initial connect and waits for the channel to reach Ready, returning a
// *GatewayError if the gateway cannot be reached in that window. Cancelling
// ctx aborts the probe.
func Dial(ctx context.Context, opts Options) (*Client, error) {
conn, err := dial(ctx, opts)
if err != nil {
return nil, err
}
return NewClient(conn, opts), nil
}
// dial builds the shared gRPC connection used by both Client and GalaxyClient:
// it resolves transport credentials, assembles dial options, creates a lazy
// connection with grpc.NewClient, and runs the DialTimeout-bounded readiness
// probe so callers still fail fast when the gateway is unreachable.
func dial(ctx context.Context, opts Options) (*grpc.ClientConn, error) {
if opts.Endpoint == "" {
return nil, errors.New("mxgateway: endpoint is required")
}
dialCtx := ctx
cancel := func() {}
if opts.DialTimeout > 0 {
dialCtx, cancel = context.WithTimeout(ctx, opts.DialTimeout)
} else if _, ok := ctx.Deadline(); !ok {
dialCtx, cancel = context.WithTimeout(ctx, defaultDialTimeout)
}
defer cancel()
transportCredentials, err := resolveTransportCredentials(opts)
if err != nil {
return nil, err
@@ -61,16 +76,46 @@ func Dial(ctx context.Context, opts Options) (*Client, error) {
grpc.WithTransportCredentials(transportCredentials),
grpc.WithUnaryInterceptor(unaryAuthInterceptor(opts.APIKey)),
grpc.WithStreamInterceptor(streamAuthInterceptor(opts.APIKey)),
grpc.WithBlock(),
}
dialOptions = append(dialOptions, opts.DialOptions...)
conn, err := grpc.DialContext(dialCtx, opts.Endpoint, dialOptions...)
conn, err := grpc.NewClient(opts.Endpoint, dialOptions...)
if err != nil {
return nil, &GatewayError{Op: "dial", Err: err}
}
return NewClient(conn, opts), nil
if err := waitForReady(ctx, conn, opts.DialTimeout); err != nil {
_ = conn.Close()
return nil, &GatewayError{Op: "dial", Err: err}
}
return conn, nil
}
// waitForReady triggers the initial connect on conn and blocks until the
// channel reaches connectivity.Ready, the timeout elapses, or ctx is
// cancelled. The wait is bounded by dialTimeout when positive, otherwise by
// ctx's existing deadline, otherwise by defaultDialTimeout.
func waitForReady(ctx context.Context, conn *grpc.ClientConn, dialTimeout time.Duration) error {
probeCtx := ctx
cancel := func() {}
if dialTimeout > 0 {
probeCtx, cancel = context.WithTimeout(ctx, dialTimeout)
} else if _, ok := ctx.Deadline(); !ok {
probeCtx, cancel = context.WithTimeout(ctx, defaultDialTimeout)
}
defer cancel()
conn.Connect()
for {
state := conn.GetState()
if state == connectivity.Ready {
return nil
}
if !conn.WaitForStateChange(probeCtx, state) {
return probeCtx.Err()
}
}
}
// NewClient wraps an existing gRPC connection. The caller owns closing conn
@@ -188,7 +233,15 @@ func (c *Client) Close() error {
}
func (c *Client) callContext(ctx context.Context) (context.Context, context.CancelFunc) {
timeout := c.opts.CallTimeout
return callContext(ctx, c.opts.CallTimeout)
}
// callContext derives a per-RPC context from ctx, applying callTimeout: zero
// uses defaultCallTimeout, a negative value disables the bound entirely, and a
// caller-supplied deadline that is already sooner than the derived timeout is
// kept as-is rather than being lengthened.
func callContext(ctx context.Context, callTimeout time.Duration) (context.Context, context.CancelFunc) {
timeout := callTimeout
if timeout == 0 {
timeout = defaultCallTimeout
}