e2e: build client CLIs once and drain events so dotnet/java pass
The cross-language client e2e matrix failed for dotnet and Java. Both failures were in the harness, not the client code. 1. Per-call toolchain cold-start. The matrix issues ~250 CLI calls per client; it invoked `dotnet run` / `gradle :mxgateway-cli:run` every time, rebuilding and cold-starting the toolchain per call. Build each CLI once up front (`dotnet build`, `gradle :mxgateway-cli:installDist`) and invoke the compiled artifact directly. This alone fixes dotnet. 2. Worker event-channel overflow. The per-tag advise loop advises every discovered tag with no StreamEvents consumer attached, so change events accumulate in the worker event channel (MxGateway:Events:QueueCapacity) until FailFast faults the worker. dotnet's faster loop slipped under the window; the Java CLI's process-per-call JVM cold-start did not. Every -DrainEveryTags advised tags (default 15) the loop connects a short StreamEvents drain; the gateway's per-stream producer empties the channel the instant a subscriber attaches, so a small bounded read suffices. Full 5-client matrix (dotnet, go, rust, python, java) now passes with -VerifyWrite against a live gateway. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,6 +31,14 @@ param(
|
||||
[string]$Database = "ZB",
|
||||
[int]$EventLimit = 5,
|
||||
[int]$BulkTagCount = 6,
|
||||
# The per-tag advise loop advises every discovered tag with no StreamEvents
|
||||
# consumer attached, so MXAccess change events accumulate in the worker
|
||||
# event channel (MxGateway:Events:QueueCapacity). Left unbounded the channel
|
||||
# overflows under FailFast backpressure and faults the worker — slow,
|
||||
# process-per-call clients (the Java CLI) hit this before the loop ends.
|
||||
# Every DrainEveryTags advised tags the loop connects a short-lived
|
||||
# StreamEvents drain so the gateway pumps that channel empty. 0 disables it.
|
||||
[int]$DrainEveryTags = 15,
|
||||
[switch]$SkipStream,
|
||||
[switch]$SkipBulk,
|
||||
# Skip the bulk read+write coverage that runs alongside the existing
|
||||
@@ -97,6 +105,10 @@ if ($BulkTagCount -lt 1) {
|
||||
throw "BulkTagCount must be greater than zero."
|
||||
}
|
||||
|
||||
if ($DrainEveryTags -lt 0) {
|
||||
throw "DrainEveryTags cannot be negative."
|
||||
}
|
||||
|
||||
if ($WriteEchoMaxEvents -lt 1) {
|
||||
throw "WriteEchoMaxEvents must be greater than zero."
|
||||
}
|
||||
@@ -454,6 +466,49 @@ function Assert-BulkResults {
|
||||
}
|
||||
}
|
||||
|
||||
# Builds the dotnet and Java client CLIs once up front and records the path to
|
||||
# each compiled artifact. The e2e matrix issues ~250 CLI calls per client;
|
||||
# invoking `dotnet run` / `gradle :mxgateway-cli:run` per call rebuilds and
|
||||
# cold-starts the toolchain every time, stretching the per-tag advise loop long
|
||||
# enough for the worker event channel to overflow under the FailFast
|
||||
# backpressure policy. Running the compiled artifact keeps per-call latency
|
||||
# sub-second, matching the Go/Rust/Python paths.
|
||||
function Initialize-ClientBuilds {
|
||||
if ($Clients -contains "dotnet") {
|
||||
$cliProject = Join-Path $repoRoot "clients/dotnet/MxGateway.Client.Cli/MxGateway.Client.Cli.csproj"
|
||||
$script:dotnetCliExe = Join-Path $repoRoot `
|
||||
"clients/dotnet/MxGateway.Client.Cli/bin/Debug/net10.0/MxGateway.Client.Cli.exe"
|
||||
if (-not $DryRun) {
|
||||
Write-Host "Building the .NET client CLI once: $cliProject"
|
||||
Invoke-NativeCommand -FilePath "dotnet" `
|
||||
-Arguments @("build", $cliProject, "-c", "Debug", "--nologo", "-v", "quiet") `
|
||||
-WorkingDirectory $repoRoot | Out-Null
|
||||
if (-not (Test-Path $script:dotnetCliExe)) {
|
||||
throw "The .NET client CLI build did not produce '$script:dotnetCliExe'."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($Clients -contains "java") {
|
||||
$script:javaCliBat = Join-Path $repoRoot `
|
||||
"clients/java/mxgateway-cli/build/install/mxgateway-cli/bin/mxgateway-cli.bat"
|
||||
if (-not $DryRun) {
|
||||
$gradleCommand = Get-Command "gradle.bat", "gradle.cmd", "gradle.exe", "gradle" `
|
||||
-ErrorAction SilentlyContinue | Select-Object -First 1
|
||||
if ($null -eq $gradleCommand) {
|
||||
throw "The 'gradle' command was not found on PATH; the Java client e2e flow requires Gradle."
|
||||
}
|
||||
Write-Host "Installing the Java client CLI once via :mxgateway-cli:installDist"
|
||||
Invoke-NativeCommand -FilePath "cmd.exe" `
|
||||
-Arguments @("/c", $gradleCommand.Source, "--quiet", ":mxgateway-cli:installDist") `
|
||||
-WorkingDirectory (Join-Path $repoRoot "clients/java") | Out-Null
|
||||
if (-not (Test-Path $script:javaCliBat)) {
|
||||
throw "The Java client CLI install did not produce '$script:javaCliBat'."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Get-ClientCommand {
|
||||
param(
|
||||
[string]$Client,
|
||||
@@ -476,7 +531,6 @@ function Get-ClientCommand {
|
||||
switch ($Client) {
|
||||
"dotnet" {
|
||||
$arguments = @(
|
||||
"run", "--project", "clients/dotnet/MxGateway.Client.Cli", "--",
|
||||
$Operation,
|
||||
"--endpoint", $httpEndpoint,
|
||||
"--api-key-env", $ApiKeyEnvName,
|
||||
@@ -509,7 +563,7 @@ function Get-ClientCommand {
|
||||
} elseif ($Operation -eq "close-session") {
|
||||
$arguments += @("--session-id", $Values.sessionId)
|
||||
}
|
||||
return [pscustomobject]@{ file = "dotnet"; args = $arguments; cwd = $repoRoot; env = @{} }
|
||||
return [pscustomobject]@{ file = $script:dotnetCliExe; args = $arguments; cwd = $repoRoot; env = @{} }
|
||||
}
|
||||
"go" {
|
||||
$arguments = @(
|
||||
@@ -657,18 +711,15 @@ function Get-ClientCommand {
|
||||
} elseif ($Operation -eq "close-session") {
|
||||
$cliArgs += @("--session-id", $Values.sessionId)
|
||||
}
|
||||
$arguments = @("--quiet", ":mxgateway-cli:run", "--args=$($cliArgs -join ' ')")
|
||||
# Gradle ships as gradle.bat on Windows; .NET's Process.Start
|
||||
# (UseShellExecute=false) cannot launch a batch file directly, so
|
||||
# resolve the launcher and run it through cmd.exe.
|
||||
$gradleCommand = Get-Command "gradle.bat", "gradle.cmd", "gradle.exe", "gradle" `
|
||||
-ErrorAction SilentlyContinue | Select-Object -First 1
|
||||
if ($null -eq $gradleCommand) {
|
||||
throw "The 'gradle' command was not found on PATH; the Java client e2e flow requires Gradle."
|
||||
}
|
||||
# The Java CLI is installed once up front (gradle
|
||||
# :mxgateway-cli:installDist) so each call runs the generated
|
||||
# launcher script directly instead of paying Gradle configuration
|
||||
# plus a JVM cold-start per invocation. .NET's Process.Start
|
||||
# (UseShellExecute=false) cannot launch a .bat directly, so the
|
||||
# launcher runs through cmd.exe.
|
||||
return [pscustomobject]@{
|
||||
file = "cmd.exe"
|
||||
args = @("/c", $gradleCommand.Source) + $arguments
|
||||
args = @("/c", $script:javaCliBat) + $cliArgs
|
||||
cwd = (Join-Path $repoRoot "clients/java")
|
||||
env = @{}
|
||||
}
|
||||
@@ -796,6 +847,30 @@ function Invoke-ClientOperationExpectingFailure {
|
||||
-AllowFailure
|
||||
}
|
||||
|
||||
# Connects a short-lived StreamEvents consumer so the gateway empties the worker
|
||||
# event channel. The per-tag advise loop advises every discovered tag with no
|
||||
# consumer attached; without periodic draining the worker event channel
|
||||
# (MxGateway:Events:QueueCapacity) overflows under FailFast backpressure and
|
||||
# faults the worker.
|
||||
#
|
||||
# A small bounded read is enough: the gateway's per-stream producer
|
||||
# (EventStreamService.ProduceEventsAsync) races ahead of the CLI and pulls the
|
||||
# entire worker event channel into its own buffer the instant a subscriber
|
||||
# attaches, so the channel is emptied long before the CLI finishes reading
|
||||
# these events. Run via the expecting-failure path so the drain's exit code is
|
||||
# ignored — its purpose is the side effect (emptying the channel), not output.
|
||||
function Invoke-EventDrain {
|
||||
param(
|
||||
[string]$Client,
|
||||
[string]$SessionId
|
||||
)
|
||||
|
||||
Invoke-ClientOperationExpectingFailure -Client $Client -Operation "stream-events" -Values @{
|
||||
sessionId = $SessionId
|
||||
maxEvents = 200
|
||||
} | Out-Null
|
||||
}
|
||||
|
||||
# Runs the full e2e flow for a single language client and returns the result
|
||||
# record. Discovered tags are passed in so the (slow) SQL discovery runs once.
|
||||
function Invoke-ClientFlow {
|
||||
@@ -1000,6 +1075,7 @@ function Invoke-ClientFlow {
|
||||
}
|
||||
}
|
||||
|
||||
$advisedSinceDrain = 0
|
||||
foreach ($tag in $Tags) {
|
||||
$addJson = Invoke-ClientOperation -Client $Client -Operation "add-item" -Values @{
|
||||
sessionId = $sessionId
|
||||
@@ -1020,6 +1096,15 @@ function Invoke-ClientFlow {
|
||||
itemHandle = $itemHandle
|
||||
protectedWriteRequired = $tag.attributeName -eq "ProtectedValue"
|
||||
}
|
||||
|
||||
# Drain the worker event channel every DrainEveryTags advised tags
|
||||
# so this unbounded advise loop cannot overflow it and fault the
|
||||
# worker before the loop completes.
|
||||
$advisedSinceDrain++
|
||||
if ($DrainEveryTags -gt 0 -and $advisedSinceDrain -ge $DrainEveryTags) {
|
||||
Invoke-EventDrain -Client $Client -SessionId $sessionId
|
||||
$advisedSinceDrain = 0
|
||||
}
|
||||
}
|
||||
|
||||
# --- Event streaming ----------------------------------------------
|
||||
@@ -1131,6 +1216,7 @@ function Get-ChildArgumentList {
|
||||
"-Database", $Database,
|
||||
"-EventLimit", "$EventLimit",
|
||||
"-BulkTagCount", "$BulkTagCount",
|
||||
"-DrainEveryTags", "$DrainEveryTags",
|
||||
"-WriteAttribute", $WriteAttribute,
|
||||
"-WriteType", $WriteType,
|
||||
"-WriteValueBase", "$WriteValueBase",
|
||||
@@ -1219,6 +1305,7 @@ if ($Parallel -and $Clients.Count -gt 1) {
|
||||
attributes = $Attributes
|
||||
eventLimit = $EventLimit
|
||||
bulkTagCount = $BulkTagCount
|
||||
drainEveryTags = $DrainEveryTags
|
||||
skipStream = [bool]$SkipStream
|
||||
skipBulk = [bool]$SkipBulk
|
||||
verifyWrite = [bool]$VerifyWrite
|
||||
@@ -1247,6 +1334,8 @@ if ($Parallel -and $Clients.Count -gt 1) {
|
||||
}
|
||||
|
||||
# --- Serial mode -----------------------------------------------------------
|
||||
Initialize-ClientBuilds
|
||||
|
||||
$discoveryJson = & $discoveryScript `
|
||||
-MachineStart $MachineStart `
|
||||
-MachineEnd $MachineEnd `
|
||||
@@ -1277,6 +1366,7 @@ $run = [ordered]@{
|
||||
attributes = $Attributes
|
||||
eventLimit = $EventLimit
|
||||
bulkTagCount = $BulkTagCount
|
||||
drainEveryTags = $DrainEveryTags
|
||||
skipStream = [bool]$SkipStream
|
||||
skipBulk = [bool]$SkipBulk
|
||||
verifyWrite = [bool]$VerifyWrite
|
||||
|
||||
Reference in New Issue
Block a user