fix: resolve Client.Java + Worker.Tests findings (pending windev verification)
Client.Java-040..048, Worker.Tests-034/035/036. Edits applied on the Mac, which has no JRE and cannot build the x86+MXAccess worker tests; findings are marked In Progress pending gradle + x86 build verification on windev. Do not mark Resolved until verified there.
This commit is contained in:
+87
-31
@@ -39,7 +39,10 @@ import java.util.Optional;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.Consumer;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmReply;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.AcknowledgeAlarmRequest;
|
||||
import mxaccess_gateway.v1.MxaccessGateway.ActiveAlarmSnapshot;
|
||||
@@ -105,8 +108,14 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test-friendly entry point that runs the CLI against the supplied
|
||||
* {@link PrintWriter} pair instead of the system streams.
|
||||
* Entry point that runs the CLI against the supplied {@link PrintWriter}
|
||||
* pair instead of the system streams. This overload wires the production
|
||||
* {@link GrpcMxGatewayCliClientFactory} (a real gRPC channel), so it is
|
||||
* suitable for embedding the CLI but not for unit tests that need to stub
|
||||
* the gateway. Tests should use the package-private
|
||||
* {@link #execute(MxGatewayCliClientFactory, PrintWriter, PrintWriter, String...)}
|
||||
* / {@link #commandLine(MxGatewayCliClientFactory)} overloads, which accept
|
||||
* an injectable client factory.
|
||||
*
|
||||
* @param out writer that receives standard output
|
||||
* @param err writer that receives standard error
|
||||
@@ -1536,50 +1545,74 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
StreamAlarmsRequest request = StreamAlarmsRequest.newBuilder()
|
||||
.setAlarmFilterPrefix(filterPrefix)
|
||||
.build();
|
||||
// Client.Java-033 — fail-fast on overflow. A bare
|
||||
// queue.offer(value) silently drops messages past capacity,
|
||||
// which violates the JavaStyleGuide "do not drop events"
|
||||
// contract and lets the CLI exit 0 on a truncated feed.
|
||||
// Mirrors MxEventStream's overflow branch: detect a failed
|
||||
// offer, cancel the subscription, drain the buffer, then
|
||||
// queue an explicit overflow exception followed by the END
|
||||
// sentinel so the drain loop surfaces a non-zero exit.
|
||||
// Client.Java-033/040/042 — fail-fast on overflow and on
|
||||
// transport errors. A bare queue.offer(value) silently drops
|
||||
// messages past capacity (violating the JavaStyleGuide "do not
|
||||
// drop events" contract and letting the CLI exit 0 on a
|
||||
// truncated feed), and a bare queue.offer(error) on a full
|
||||
// queue would drop the terminal item and deadlock the drain on
|
||||
// queue.take().
|
||||
//
|
||||
// Terminal transitions (overflow, transport error, clean
|
||||
// completion) are now serialised through a single AtomicBoolean
|
||||
// guard plus a dedicated `terminal` slot rather than
|
||||
// re-clearing the shared queue. The first terminal condition
|
||||
// wins; a concurrent onNext on the gRPC I/O thread can no
|
||||
// longer displace it (Client.Java-040). The drain reads the
|
||||
// terminal slot independently of the bounded queue, so a full
|
||||
// queue can never strand the terminal item (Client.Java-042).
|
||||
AtomicReference<MxGatewayAlarmFeedSubscription> subscriptionRef = new AtomicReference<>();
|
||||
AtomicBoolean terminated = new AtomicBoolean();
|
||||
AtomicReference<Object> terminal = new AtomicReference<>();
|
||||
Consumer<Object> terminate = item -> {
|
||||
if (terminated.compareAndSet(false, true)) {
|
||||
terminal.set(item);
|
||||
MxGatewayAlarmFeedSubscription sub = subscriptionRef.get();
|
||||
if (sub != null) {
|
||||
sub.cancel();
|
||||
}
|
||||
}
|
||||
};
|
||||
MxGatewayAlarmFeedSubscription subscription =
|
||||
client.streamAlarms(request, new StreamObserver<>() {
|
||||
@Override
|
||||
public void onNext(AlarmFeedMessage value) {
|
||||
if (terminated.get()) {
|
||||
return;
|
||||
}
|
||||
if (!queue.offer(value)) {
|
||||
MxGatewayAlarmFeedSubscription sub = subscriptionRef.get();
|
||||
if (sub != null) {
|
||||
sub.cancel();
|
||||
}
|
||||
queue.clear();
|
||||
queue.offer(new IllegalStateException(
|
||||
terminate.accept(new IllegalStateException(
|
||||
"stream-alarms queue overflowed (capacity 1024); consumer too slow"));
|
||||
queue.offer(ALARM_FEED_END);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Throwable error) {
|
||||
queue.offer(error);
|
||||
terminate.accept(error);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onCompleted() {
|
||||
queue.offer(ALARM_FEED_END);
|
||||
terminate.accept(ALARM_FEED_END);
|
||||
}
|
||||
});
|
||||
subscriptionRef.set(subscription);
|
||||
try {
|
||||
int count = 0;
|
||||
while (true) {
|
||||
Object item = queue.take();
|
||||
if (item == ALARM_FEED_END) {
|
||||
break;
|
||||
}
|
||||
if (item instanceof Throwable error) {
|
||||
// Poll with a short timeout so the dedicated terminal
|
||||
// slot is observed even when the bounded queue is full
|
||||
// of normal messages the consumer has not yet drained.
|
||||
Object item = queue.poll(50, TimeUnit.MILLISECONDS);
|
||||
if (item == null) {
|
||||
Object end = terminal.get();
|
||||
if (end == null) {
|
||||
continue;
|
||||
}
|
||||
if (end == ALARM_FEED_END) {
|
||||
break;
|
||||
}
|
||||
Throwable error = (Throwable) end;
|
||||
throw new IllegalStateException(
|
||||
"gateway stream alarms failed: " + error.getMessage(), error);
|
||||
}
|
||||
@@ -2184,13 +2217,36 @@ public final class MxGatewayCli implements Callable<Integer> {
|
||||
return jsonString(value.toString());
|
||||
}
|
||||
|
||||
private static String jsonString(String value) {
|
||||
return '"'
|
||||
+ value.replace("\\", "\\\\")
|
||||
.replace("\"", "\\\"")
|
||||
.replace("\r", "\\r")
|
||||
.replace("\n", "\\n")
|
||||
+ '"';
|
||||
// Package-private for the Client.Java-041 escaping regression test.
|
||||
static String jsonString(String value) {
|
||||
// RFC 8259 requires the two-character escapes for the named control
|
||||
// characters and \u00XX escapes for the remaining U+0000–U+001F (and
|
||||
// U+007F) range. The old implementation escaped only \\ \" \r \n, so a
|
||||
// value containing a tab, backspace, form-feed, or any other control
|
||||
// character produced malformed JSON (Client.Java-041).
|
||||
StringBuilder builder = new StringBuilder(value.length() + 2);
|
||||
builder.append('"');
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
char c = value.charAt(i);
|
||||
switch (c) {
|
||||
case '\\' -> builder.append("\\\\");
|
||||
case '"' -> builder.append("\\\"");
|
||||
case '\r' -> builder.append("\\r");
|
||||
case '\n' -> builder.append("\\n");
|
||||
case '\t' -> builder.append("\\t");
|
||||
case '\b' -> builder.append("\\b");
|
||||
case '\f' -> builder.append("\\f");
|
||||
default -> {
|
||||
if (c < 0x20 || c == 0x7f) {
|
||||
builder.append(String.format("\\u%04x", (int) c));
|
||||
} else {
|
||||
builder.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.append('"');
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private record RawJson(String value) {
|
||||
|
||||
+16
@@ -46,6 +46,22 @@ import mxaccess_gateway.v1.MxaccessGateway.SessionState;
|
||||
* instance uses a unique server name so harnesses do not collide. The
|
||||
* {@code directExecutor()} wiring keeps all dispatch on the calling thread, so
|
||||
* no background threads are leaked.
|
||||
*
|
||||
* <p><strong>Implemented RPCs.</strong> The scripted services override only the
|
||||
* RPCs the CLI tests currently exercise:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code MxAccessGateway}: {@code streamEvents}, {@code closeSession}.</li>
|
||||
* <li>{@code GalaxyRepository}: {@code discoverHierarchy},
|
||||
* {@code watchDeployEvents}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* Every other RPC (e.g. {@code openSession}, {@code invoke}, {@code register},
|
||||
* {@code streamAlarms}, {@code queryActiveAlarms}, {@code browseChildren}) is
|
||||
* left at the generated {@code *ImplBase} default and therefore returns gRPC
|
||||
* {@code UNIMPLEMENTED} by design. A future test that needs one of those paths
|
||||
* must add the corresponding scripted override here first — otherwise the call
|
||||
* fails with {@code UNIMPLEMENTED} rather than the behaviour under test.
|
||||
*/
|
||||
final class InProcessGatewayHarness implements AutoCloseable {
|
||||
private final String serverName;
|
||||
|
||||
+84
-40
@@ -56,17 +56,37 @@ final class MxGatewayCliTests {
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertEquals("", run.errors());
|
||||
assertTrue(run.output().contains("mxgateway-java 0.1.0"));
|
||||
assertTrue(run.output().contains("mxgateway-java 0.1.1"));
|
||||
assertTrue(run.output().contains("gatewayProtocolVersion=3"));
|
||||
assertTrue(run.output().contains("workerProtocolVersion=1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void jsonStringEscapesControlCharacters() {
|
||||
// Client.Java-041 — the hand-rolled jsonString escaped only \\ \" \r \n,
|
||||
// so a tab/backspace/form-feed or any other control char produced
|
||||
// malformed JSON (RFC 8259). After the fix the named control chars use
|
||||
// their two-character escapes and the rest use \u00XX.
|
||||
assertEquals("\"a\\tb\"", MxGatewayCli.jsonString("a\tb"));
|
||||
assertEquals("\"a\\bb\"", MxGatewayCli.jsonString("a\bb"));
|
||||
assertEquals("\"a\\fb\"", MxGatewayCli.jsonString("a\fb"));
|
||||
assertEquals("\"a\\rb\"", MxGatewayCli.jsonString("a\rb"));
|
||||
assertEquals("\"a\\nb\"", MxGatewayCli.jsonString("a\nb"));
|
||||
// A non-named control character (U+0001) must become .
|
||||
assertEquals("\"a\\u0001b\"", MxGatewayCli.jsonString("ab"));
|
||||
// DEL (U+007F) is also escaped.
|
||||
assertEquals("\"a\\u007fb\"", MxGatewayCli.jsonString("ab"));
|
||||
// Quote and backslash still escape; ordinary printable text is verbatim.
|
||||
assertEquals("\"a\\\"\\\\b\"", MxGatewayCli.jsonString("a\"\\b"));
|
||||
assertEquals("\"plain\"", MxGatewayCli.jsonString("plain"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void versionCommandPrintsJson() {
|
||||
CliRun run = execute(new FakeClientFactory(), "version", "--json");
|
||||
|
||||
assertEquals(0, run.exitCode());
|
||||
assertTrue(run.output().contains("\"clientVersion\":\"0.1.0\""));
|
||||
assertTrue(run.output().contains("\"clientVersion\":\"0.1.1\""));
|
||||
assertTrue(run.output().contains("\"gatewayProtocolVersion\":3"));
|
||||
}
|
||||
|
||||
@@ -241,27 +261,20 @@ final class MxGatewayCliTests {
|
||||
void galaxyBrowseParentZeroEmitsWarningToStderr() {
|
||||
// --parent 0 is the server sentinel for roots; passing it explicitly is
|
||||
// almost certainly a mistake. The CLI must print a warning to stderr
|
||||
// (matching Go/Rust client behaviour) but must still attempt the call
|
||||
// (exit behaviour depends on gateway reachability, not tested here;
|
||||
// we only assert the warning path is triggered by checking the error
|
||||
// writer before any gRPC connection is attempted).
|
||||
// (matching Go/Rust client behaviour) but must still attempt the call.
|
||||
//
|
||||
// GalaxyBrowseCommand connects to a real GalaxyRepositoryClient, so the
|
||||
// call() body will throw after printing the warning when no gateway is
|
||||
// reachable. We only assert the warning appears on stderr.
|
||||
StringWriter output = new StringWriter();
|
||||
StringWriter errors = new StringWriter();
|
||||
// Non-zero exit is expected (no live gateway), but the warning must
|
||||
// appear on stderr regardless of what happens next.
|
||||
MxGatewayCli.execute(
|
||||
new FakeClientFactory(),
|
||||
new PrintWriter(output, true),
|
||||
new PrintWriter(errors, true),
|
||||
// GalaxyBrowseCommand prints the warning, then calls connect() on the
|
||||
// GalaxyClientFactory. We inject a stub factory whose connect() throws,
|
||||
// so only the warning path runs — no live Netty channel to localhost is
|
||||
// constructed (Client.Java-043). The warning is emitted before
|
||||
// connect() is reached, so it appears on stderr regardless.
|
||||
CliRun run = executeGalaxy(
|
||||
new ThrowingGalaxyClientFactory(),
|
||||
"galaxy-browse", "--parent", "0", "--depth", "1");
|
||||
|
||||
assertTrue(
|
||||
errors.toString().contains("--parent 0"),
|
||||
"expected '--parent 0' warning on stderr; got: " + errors);
|
||||
run.errors().contains("--parent 0"),
|
||||
"expected '--parent 0' warning on stderr; got: " + run.errors());
|
||||
}
|
||||
|
||||
// ---- galaxy command-name aliases (D9-java) ----
|
||||
@@ -678,21 +691,28 @@ final class MxGatewayCliTests {
|
||||
|
||||
@Test
|
||||
void streamAlarmsCommandFailsFastOnQueueOverflow() {
|
||||
// Client.Java-033 regression — the CLI's stream-alarms bounded queue
|
||||
// used queue.offer(value) which silently dropped messages past
|
||||
// capacity (1024). After the fix the CLI must surface the overflow
|
||||
// as a non-zero exit (mirroring MxEventStream's fail-fast contract).
|
||||
// Client.Java-033/040/046 regression — the CLI's stream-alarms bounded
|
||||
// queue used queue.offer(value) which silently dropped messages past
|
||||
// capacity (1024). After the fix the CLI must surface the overflow as a
|
||||
// non-zero exit (mirroring MxEventStream's fail-fast contract).
|
||||
//
|
||||
// The OverflowingFakeClient floods the gRPC observer with 2000
|
||||
// messages synchronously, which exceeds the bounded 1024-element
|
||||
// queue. The fix detects the failed offer, cancels the subscription,
|
||||
// queues an overflow exception, and the drain loop surfaces it.
|
||||
// The OverflowingFakeClient floods the gRPC observer on a BACKGROUND
|
||||
// thread so the subscription is already published when the overflow
|
||||
// fires — exercising the terminate() cancel path with a non-null
|
||||
// subscription (Client.Java-046), not just the synchronous-flood path
|
||||
// where subscriptionRef is still null. The fix records the overflow in
|
||||
// a dedicated terminal slot (no queue.clear, Client.Java-040) and the
|
||||
// drain loop surfaces it with the overflow message text.
|
||||
OverflowingFakeClientFactory factory = new OverflowingFakeClientFactory();
|
||||
CliRun run = execute(factory, "stream-alarms", "--filter-prefix", "Flood");
|
||||
|
||||
assertFalse(run.exitCode() == 0,
|
||||
"expected non-zero exit when the alarm queue overflows; got exit=" + run.exitCode()
|
||||
+ " out=\n" + run.output() + "\nerr=\n" + run.errors());
|
||||
assertTrue(
|
||||
run.errors().contains("queue overflowed") || run.output().contains("queue overflowed"),
|
||||
"expected the overflow message text to surface; out=\n" + run.output()
|
||||
+ "\nerr=\n" + run.errors());
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -1050,6 +1070,18 @@ final class MxGatewayCliTests {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Galaxy client factory whose {@code connect} throws, so a test can exercise
|
||||
* a command's pre-connect path (e.g. the {@code --parent 0} warning) without
|
||||
* constructing a live Netty channel to localhost (Client.Java-043).
|
||||
*/
|
||||
private static final class ThrowingGalaxyClientFactory implements MxGatewayCli.GalaxyClientFactory {
|
||||
@Override
|
||||
public com.zb.mom.ww.mxgateway.client.GalaxyRepositoryClient connect(MxGatewayClientOptions options) {
|
||||
throw new IllegalStateException("galaxy connect not available in this test");
|
||||
}
|
||||
}
|
||||
|
||||
private static final class OverflowingFakeClient implements MxGatewayCli.MxGatewayCliClient {
|
||||
private final PrintWriter out;
|
||||
|
||||
@@ -1089,19 +1121,31 @@ final class MxGatewayCliTests {
|
||||
@Override
|
||||
public MxGatewayAlarmFeedSubscription streamAlarms(
|
||||
StreamAlarmsRequest request, StreamObserver<AlarmFeedMessage> observer) {
|
||||
// Synchronously push 2000 messages to overflow the CLI's bounded
|
||||
// 1024-element queue. The CLI must surface the overflow rather
|
||||
// than silently dropping the trailing ~976 messages.
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
observer.onNext(AlarmFeedMessage.newBuilder()
|
||||
.setActiveAlarm(ActiveAlarmSnapshot.newBuilder()
|
||||
.setAlarmFullReference("Flood." + i)
|
||||
.setCurrentState(AlarmConditionState.ALARM_CONDITION_STATE_ACTIVE)
|
||||
.setSeverity(700))
|
||||
.build());
|
||||
}
|
||||
observer.onCompleted();
|
||||
return new MxGatewayAlarmFeedSubscription();
|
||||
// Push messages on a BACKGROUND thread (mirroring real gRPC, which
|
||||
// delivers onNext on a netty I/O thread) so the CLI's
|
||||
// subscriptionRef is already published when the overflow fires —
|
||||
// this exercises the terminate() cancel path with a non-null
|
||||
// subscription (Client.Java-046), unlike a synchronous flood that
|
||||
// overflows before streamAlarms even returns. Keeps pushing until
|
||||
// it observes the CLI cancelling the subscription on overflow, so
|
||||
// no fixed message count is needed and the thread always exits.
|
||||
MxGatewayAlarmFeedSubscription subscription = new MxGatewayAlarmFeedSubscription();
|
||||
Thread flood = new Thread(() -> {
|
||||
int i = 0;
|
||||
while (!Thread.currentThread().isInterrupted() && i < 100_000) {
|
||||
observer.onNext(AlarmFeedMessage.newBuilder()
|
||||
.setActiveAlarm(ActiveAlarmSnapshot.newBuilder()
|
||||
.setAlarmFullReference("Flood." + i)
|
||||
.setCurrentState(AlarmConditionState.ALARM_CONDITION_STATE_ACTIVE)
|
||||
.setSeverity(700))
|
||||
.build());
|
||||
i++;
|
||||
}
|
||||
observer.onCompleted();
|
||||
}, "overflowing-fake-alarm-feed");
|
||||
flood.setDaemon(true);
|
||||
flood.start();
|
||||
return subscription;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@ package com.zb.mom.ww.mxgateway.client;
|
||||
public final class MxGatewayClientVersion {
|
||||
private static final int GATEWAY_PROTOCOL_VERSION = 3;
|
||||
private static final int WORKER_PROTOCOL_VERSION = 1;
|
||||
private static final String CLIENT_VERSION = "0.1.0";
|
||||
private static final String CLIENT_VERSION = "0.1.1";
|
||||
|
||||
private MxGatewayClientVersion() {
|
||||
}
|
||||
|
||||
@@ -752,13 +752,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Medium |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1552-1561` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** The `stream-alarms` overflow handler does `queue.clear()` then `offer(exception)` + `offer(ALARM_FEED_END)` non-atomically on an `ArrayBlockingQueue` shared with the gRPC delivery thread. In production gRPC (netty I/O thread), a concurrent `onNext` between the clear and the offers can re-enqueue a normal message, displacing the overflow exception so the drain loop hits the normal message and may exit before reaching the exception — exiting 0 on a truncated feed. Same race class as Client.Java-002/033.
|
||||
|
||||
**Recommendation:** Guard the overflow transition with an `AtomicBoolean` (mirror `MxGatewayStreamSubscription.terminate()`'s terminated-flag + lock) instead of re-clearing the queue.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed root cause in `StreamAlarmsCommand.call()`: the overflow branch did `queue.clear()` then `offer(exception)` + `offer(ALARM_FEED_END)`, so a concurrent `onNext` between the clear and the offers could re-enqueue a normal message and displace the overflow signal. (Note: `MxGatewayStreamSubscription` has no `terminate()` method; the terminal-guard model lives in `MxEventStream`, which itself still uses the clear+offer shape — I implemented the atomic guard the finding asks for rather than copying the older pattern.) Replaced the clear+offer with a single `AtomicBoolean terminated` guard (`compareAndSet(false,true)` — first terminal wins) plus a dedicated `AtomicReference<Object> terminal` slot that holds the terminal item (overflow exception / transport error / `ALARM_FEED_END`) independently of the bounded queue. `onNext` no longer re-clears the queue; it just stops enqueueing once terminated. The drain loop now `poll(50ms)`s and, when the queue is empty, reads the terminal slot. No re-clear, and a concurrent `onNext` can no longer displace the terminal. Fix applied 2026-06-16, pending gradle verification on windev. Regression test: `MxGatewayCliTests.streamAlarmsCommandFailsFastOnQueueOverflow` (strengthened under Client.Java-046 to drive async delivery and assert the overflow text).
|
||||
|
||||
### Client.Java-041
|
||||
|
||||
@@ -767,13 +767,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Correctness & logic bugs |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:2187-2194` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `jsonString` escapes only `\`, `"`, `\r`, `\n` — not `\t`, `\b`, `\f`, or U+0000–U+001F/U+007F. A tag address/message/reference containing a tab produces malformed JSON (RFC 8259). Affects the hand-rolled `jsonObject`/`jsonString`/`jsonValue` output paths (the protobuf `JsonFormat` path is spec-correct).
|
||||
|
||||
**Recommendation:** Add `\t`/`\b`/`\f` escapes and `\u00XX` for control chars, or route all JSON through a real JSON library.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: `jsonString` escaped only `\\ \" \r \n`, so a tab/backspace/form-feed or any other U+0000–U+001F (or U+007F) char produced malformed JSON. Rewrote `jsonString` as a per-character builder that emits the two-character escapes for `\t \b \f \r \n \" \\` and `\u00XX` for the remaining `< 0x20` range plus DEL (`0x7f`), keeping ordinary printable characters verbatim. Widened `jsonString` from `private` to package-private (matching the Client.Java-032 `commandLine(...)` precedent) so the escaping can be unit-tested directly. Fix applied 2026-06-16, pending gradle verification on windev. Regression test: `MxGatewayCliTests.jsonStringEscapesControlCharacters`.
|
||||
|
||||
### Client.Java-042
|
||||
|
||||
@@ -782,13 +782,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Error handling & resilience |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:1565-1567` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `StreamAlarmsCommand.onError` calls `queue.offer(error)` without checking the return value. If the queue is full when a transport error arrives, the error is dropped and the drain loop blocks forever on `queue.take()`. Same class as Client.Java-033 on the error path.
|
||||
|
||||
**Recommendation:** Reserve a sentinel slot or use the `terminate(Throwable)` guard from `MxEventStream`; ensure the drain always sees a terminal item.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: `onError` did a bare `queue.offer(error)` that, on a full queue, dropped the error and stranded the drain on `queue.take()` forever. Fixed together with Client.Java-040: `onError` now routes through the shared `terminate(error)` consumer, which records the throwable in the dedicated `terminal` slot (guarded by the `AtomicBoolean`, never enqueued into the bounded `queue`). The drain loop reads that slot via the `poll(50ms)` + terminal-check path, so a transport error is always observed even when the queue is full, and the `take()`-forever deadlock is gone. Fix applied 2026-06-16, pending gradle verification on windev. Covered by the same `streamAlarmsCommandFailsFastOnQueueOverflow` terminal-slot plumbing; the error path shares the slot with the overflow path.
|
||||
|
||||
### Client.Java-043
|
||||
|
||||
@@ -797,13 +797,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:241-264` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `galaxyBrowseParentZeroEmitsWarningToStderr` calls `MxGatewayCli.execute(new FakeClientFactory(), ...)` for a galaxy-browse command, which wires the real `GrpcGalaxyClientFactory` and constructs a live Netty channel to localhost:5000 as a side effect (asserting only the warning). Wasteful and non-deterministic if port 5000 is reachable.
|
||||
|
||||
**Recommendation:** Use `executeGalaxy(...)` with a `GalaxyClientFactory` stub that throws, so only the warning path runs.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: the test called `MxGatewayCli.execute(new FakeClientFactory(), ...)`, which routes galaxy commands through the production `GrpcGalaxyClientFactory`; `GalaxyBrowseCommand.call()` prints the `--parent 0` warning then `connect()`s a live `GalaxyRepositoryClient` (Netty channel to localhost:5000) before failing — wasteful and non-deterministic. Rewrote the test to use the existing `executeGalaxy(...)` seam with a new `ThrowingGalaxyClientFactory` stub whose `connect()` throws; the warning is emitted before `connect()` is reached, so only the warning path runs and no live channel is constructed. Fix applied 2026-06-16, pending gradle verification on windev. Test: `MxGatewayCliTests.galaxyBrowseParentZeroEmitsWarningToStderr` (updated).
|
||||
|
||||
### Client.Java-044
|
||||
|
||||
@@ -812,13 +812,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-client/src/main/java/com/zb/mom/ww/mxgateway/client/MxGatewayClientVersion.java:12` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `CLIENT_VERSION = "0.1.0"` is out of sync with Gradle `version = '0.1.1'` (cross-ref `clients/java/build.gradle:6`). The `version` command advertises 0.1.0 while the published artifact is 0.1.1; consumers can't use the version string as a reliable artifact check.
|
||||
|
||||
**Recommendation:** Bump `CLIENT_VERSION` to `0.1.1` (and the two test assertions), or source it from a Gradle-generated properties file.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: `MxGatewayClientVersion.CLIENT_VERSION = "0.1.0"` while `clients/java/build.gradle:16` sets `version = '0.1.1'` and the README Maven coordinate is `:0.1.1`. Bumped `CLIENT_VERSION` to `"0.1.1"` and updated the two test assertions (`MxGatewayCliTests.versionCommandPrintsProtocolVersions` line asserting `"mxgateway-java 0.1.0"` and `versionCommandPrintsJson` asserting `"clientVersion":"0.1.0"`) to `0.1.1`. Left as a hardcoded constant (sourcing from a Gradle-generated properties file was the optional alternative, not required). Fix applied 2026-06-16, pending gradle verification on windev. Tests: `MxGatewayCliTests.versionCommandPrintsProtocolVersions`, `versionCommandPrintsJson`.
|
||||
|
||||
### Client.Java-045
|
||||
|
||||
@@ -827,13 +827,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/InProcessGatewayHarness.java` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** The harness implements only `streamEvents`/`closeSession` (gateway) and `discoverHierarchy`/`watchDeployEvents` (galaxy); all other RPCs return gRPC UNIMPLEMENTED. This is undocumented, so a future test exercising invoke/register through the harness would silently get UNIMPLEMENTED.
|
||||
|
||||
**Recommendation:** Add a Javadoc note enumerating implemented RPCs and warning that others return UNIMPLEMENTED by design.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed against source (the file lives under `src/test/...`, not `src/main/...` as the finding location states): the scripted fakes override only `streamEvents`/`closeSession` (gateway) and `discoverHierarchy`/`watchDeployEvents` (galaxy); every other RPC inherits the generated `*ImplBase` default and returns gRPC `UNIMPLEMENTED`. Added a "Implemented RPCs" section to the `InProcessGatewayHarness` class Javadoc enumerating the four overridden RPCs and warning that all others (openSession, invoke, register, streamAlarms, queryActiveAlarms, browseChildren, …) return `UNIMPLEMENTED` by design, so a future test must add a scripted override first. Doc-only change. Fix applied 2026-06-16, pending gradle verification on windev. No test needed.
|
||||
|
||||
### Client.Java-046
|
||||
|
||||
@@ -842,13 +842,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/test/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCliTests.java:680-696` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `streamAlarmsCommandFailsFastOnQueueOverflow` delivers all 2000 onNext synchronously from within `streamAlarms`, so `subscriptionRef` is still null when the overflow fires — the `sub.cancel()` branch is never exercised. The test also doesn't assert the overflow message text. It passes for a reason that doesn't generalize to async gRPC delivery.
|
||||
|
||||
**Recommendation:** Deliver messages asynchronously so the cancel path runs, and assert the overflow error text appears in output.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: `OverflowingFakeClient.streamAlarms` pushed all 2000 `onNext` synchronously and returned the subscription only afterward, so `subscriptionRef` was still null when the overflow fired and the `sub.cancel()` branch never ran; the test also asserted only the exit code, not the overflow text. Reworked `OverflowingFakeClient.streamAlarms` to flood on a background daemon thread (mirroring a real netty I/O thread) and return the subscription first, so the overflow fires with a non-null published subscription and exercises the `terminate()` cancel path. Strengthened `streamAlarmsCommandFailsFastOnQueueOverflow` to additionally assert the overflow message text ("queue overflowed") surfaces in stderr/stdout. Fix applied 2026-06-16, pending gradle verification on windev. Test: `MxGatewayCliTests.streamAlarmsCommandFailsFastOnQueueOverflow` (updated; also validates the Client.Java-040 terminal-slot fix).
|
||||
|
||||
### Client.Java-047
|
||||
|
||||
@@ -857,13 +857,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/README.md` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** README advertises the `0.1.1` artifact coordinate (Gitea Maven section) while the `version` command reports `0.1.0` — the user-visible symptom of Client.Java-044. Cross-ref `MxGatewayClientVersion.java:12`.
|
||||
|
||||
**Recommendation:** Resolved by fixing Client.Java-044 (sync the compiled-in version).
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Symptom of Client.Java-044, resolved together. The README's `0.1.1` Maven coordinate (`clients/java/README.md:336`) was already correct; the divergence was the compiled-in `CLIENT_VERSION = "0.1.0"`. Bumping `CLIENT_VERSION` to `0.1.1` (Client.Java-044) makes the `version` command report `0.1.1`, matching the README. No README edit needed. Fix applied 2026-06-16, pending gradle verification on windev.
|
||||
|
||||
### Client.Java-048
|
||||
|
||||
@@ -872,13 +872,13 @@ BrowseChildrenReply reply = galaxy.browseChildren(
|
||||
| Severity | Low |
|
||||
| Category | Documentation & comments |
|
||||
| Location | `clients/java/zb-mom-ww-mxgateway-cli/src/main/java/com/zb/mom/ww/mxgateway/cli/MxGatewayCli.java:88-105` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** The public `execute(PrintWriter, PrintWriter, String...)` Javadoc calls it "Test-friendly entry point", but it wires `GrpcMxGatewayCliClientFactory` with no injection — the actual test seam is the package-private `execute(MxGatewayCliClientFactory, ...)` / `commandLine(...)` overload. Misleading.
|
||||
|
||||
**Recommendation:** Clarify the Javadoc to direct readers to the injectable overload for testing.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Confirmed: the public `execute(PrintWriter, PrintWriter, String...)` Javadoc called it the "Test-friendly entry point", but it wires the production `GrpcMxGatewayCliClientFactory` with no injection seam — unit tests actually use the package-private `execute(MxGatewayCliClientFactory, ...)` / `commandLine(...)` overloads. Rewrote the Javadoc to drop "test-friendly", explain it wires a real gRPC channel, and direct test authors to the injectable package-private overloads. Doc-only change. Fix applied 2026-06-16, pending gradle verification on windev. No test needed.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -640,13 +640,13 @@ Re-review of the worker-test delta covering the new COM seam (`MxAccessCommandEx
|
||||
| Severity | Low |
|
||||
| Category | Code organization & conventions |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs:2233`, `src/ZB.MOM.WW.MxGateway.Worker.Tests/TestSupport/NoopMxAccessServer.cs:97` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `FakeMxStatus` is defined twice — file-scope in `TestSupport/NoopMxAccessServer.cs:97` and nested in `MxAccessCommandExecutorTests.FakeMxAccessComObject:2233` — both exposing the same four public fields that `MxStatusProxyConverter` reflects over. The two copies must stay structurally identical; a future field change to the real COM struct requires updating two places, and the duplication is invisible to a reader consulting only one file.
|
||||
|
||||
**Recommendation:** Extract `FakeMxStatus` into its own `TestSupport/FakeMxStatus.cs` (or colocate both doubles) and have `MxAccessCommandExecutorTests` use the shared type instead of its nested copy.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Removed the nested `FakeMxStatus` class from `MxAccessCommandExecutorTests.FakeMxAccessComObject`; the two `new FakeMxStatus { ... }` usages in `Suspend`/`Activate` now resolve to the shared `TestSupport.FakeMxStatus` via the pre-existing `using ZB.MOM.WW.MxGateway.Worker.Tests.TestSupport;` import. Updated the XML doc on `TestSupport/NoopMxAccessServer.cs:FakeMxStatus` to note the consolidation. Fix applied 2026-06-16, pending build verification on windev.
|
||||
|
||||
### Worker.Tests-035
|
||||
|
||||
@@ -655,13 +655,13 @@ Re-review of the worker-test delta covering the new COM seam (`MxAccessCommandEx
|
||||
| Severity | Low |
|
||||
| Category | Testing coverage |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/MxAccessCommandExecutorTests.cs`, `src/ZB.MOM.WW.MxGateway.Worker/MxAccess/MxAccessCommandExecutor.cs:99-136` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `MxAccessCommandExecutor.Execute` has a `_` discard arm returning `CreateInvalidRequestReply(... "Unsupported MXAccess command kind ...")` — the safety net for an unknown `MxCommandKind` (e.g. a future gateway enum value before the worker is updated). No test passes an unknown kind and asserts `InvalidRequest`. A regression changing the arm to `throw` would propagate an unhandled exception through `WorkerPipeSession` and no test would catch it.
|
||||
|
||||
**Recommendation:** Add a `[Fact]` constructing a `StaCommand` with an undefined `MxCommandKind` value and asserting the reply is `ProtocolStatusCode.InvalidRequest` with "Unsupported" in the diagnostic.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Added `DispatchAsync_WithUnknownCommandKind_ReturnsInvalidRequestWithUnsupportedDiagnostic` to `MxAccessCommandExecutorTests`. Casts `int.MaxValue` to `MxCommandKind` (an undefined value not present in the proto-generated enum), dispatches it through `MxAccessStaSession.DispatchAsync`, asserts `ProtocolStatusCode.InvalidRequest`, and asserts `reply.DiagnosticMessage` contains "Unsupported" (case-insensitive — matching `CreateInvalidRequestReply`'s `"Unsupported MXAccess command kind ..."` message). Fix applied 2026-06-16, pending build verification on windev.
|
||||
|
||||
### Worker.Tests-036
|
||||
|
||||
@@ -670,10 +670,10 @@ Re-review of the worker-test delta covering the new COM seam (`MxAccessCommandEx
|
||||
| Severity | Low |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `src/ZB.MOM.WW.MxGateway.Worker.Tests/Ipc/WorkerPipeSessionTests.cs:983-996` |
|
||||
| Status | Open |
|
||||
| Status | In Progress |
|
||||
|
||||
**Description:** `RunAsync_SendsFirstHeartbeatImmediatelyOnEnteringLoop` carries a redundant wall-clock assertion `Assert.True(elapsed < TimeSpan.FromSeconds(5), ...)`. The existing `heartbeatWait` CTS (cancel-after 5s) already enforces the same bound — the extra wall-clock check can only fire if the heartbeat arrived but took >5s to be received, which the CTS already prevents. It is the same coarse wall-clock pattern prior findings (Worker.Tests-003/004/013/020) corrected.
|
||||
|
||||
**Recommendation:** Remove the `start`/`elapsed`/`Assert.True(elapsed < ...)` check; the CTS timeout already pins the timing contract.
|
||||
|
||||
**Resolution:** _(empty until closed)_
|
||||
**Resolution:** 2026-06-16 — Removed the `DateTimeOffset start`, `TimeSpan elapsed`, and `Assert.True(elapsed < TimeSpan.FromSeconds(5), ...)` wall-clock assertions from `RunAsync_SendsFirstHeartbeatImmediatelyOnEnteringLoop`. The `heartbeatWait` CTS (cancel-after 5s) already enforces the same timing bound. Added an inline comment explaining why the wall-clock floor is omitted, consistent with the Worker.Tests-003/004/013/020 pattern. Fix applied 2026-06-16, pending build verification on windev.
|
||||
|
||||
@@ -980,7 +980,11 @@ public sealed class WorkerPipeSessionTests
|
||||
Task runTask = session.RunAsync(cancellation.Token);
|
||||
await CompleteGatewayHandshakeAsync(pipePair, cancellation.Token);
|
||||
|
||||
DateTimeOffset start = DateTimeOffset.UtcNow;
|
||||
// The heartbeatWait CTS (5s cancel-after) already enforces the timing bound:
|
||||
// if the first heartbeat is not received within 5s, ReadUntilAsync throws
|
||||
// OperationCanceledException and the test fails. A redundant wall-clock
|
||||
// elapsed < 5s assertion would add the same class of flakiness
|
||||
// Workers.Tests-003/004/013/020 corrected elsewhere, so it is omitted here.
|
||||
using CancellationTokenSource heartbeatWait = CancellationTokenSource
|
||||
.CreateLinkedTokenSource(cancellation.Token);
|
||||
heartbeatWait.CancelAfter(TimeSpan.FromSeconds(5));
|
||||
@@ -988,12 +992,8 @@ public sealed class WorkerPipeSessionTests
|
||||
pipePair.GatewayReader,
|
||||
WorkerEnvelope.BodyOneofCase.WorkerHeartbeat,
|
||||
heartbeatWait.Token);
|
||||
TimeSpan elapsed = DateTimeOffset.UtcNow - start;
|
||||
|
||||
Assert.Equal(WorkerEnvelope.BodyOneofCase.WorkerHeartbeat, heartbeat.BodyCase);
|
||||
Assert.True(
|
||||
elapsed < TimeSpan.FromSeconds(5),
|
||||
$"First heartbeat took {elapsed}, expected well under the 30s interval.");
|
||||
|
||||
await SendShutdownAndWaitAsync(pipePair, runTask, cancellation.Token);
|
||||
}
|
||||
|
||||
@@ -1123,6 +1123,36 @@ public sealed class MxAccessCommandExecutorTests
|
||||
Assert.Equal(500, fakeComObject.SetBufferedUpdateIntervalValue);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that a command with an unknown <see cref="MxCommandKind"/> value returns an
|
||||
/// <see cref="ProtocolStatusCode.InvalidRequest"/> reply whose diagnostic contains "Unsupported".
|
||||
/// This pins the <c>_ => CreateInvalidRequestReply(...)</c> discard arm in
|
||||
/// <c>MxAccessCommandExecutor.Execute</c>: a regression that changed the arm to
|
||||
/// <c>throw</c> would propagate an unhandled exception through <c>WorkerPipeSession</c>
|
||||
/// and no other test would catch it.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task DispatchAsync_WithUnknownCommandKind_ReturnsInvalidRequestWithUnsupportedDiagnostic()
|
||||
{
|
||||
FakeMxAccessComObjectFactory factory = new(new FakeMxAccessComObject(registerHandle: 999));
|
||||
using StaRuntime runtime = CreateRuntime();
|
||||
using MxAccessStaSession session = new(runtime, factory, new NoopEventSink());
|
||||
await session.StartAsync(workerProcessId: 1234);
|
||||
|
||||
// Cast an integer outside the defined MxCommandKind range to an unknown kind value.
|
||||
MxCommandKind unknownKind = (MxCommandKind)int.MaxValue;
|
||||
MxCommandReply reply = await session.DispatchAsync(new StaCommand(
|
||||
"session-1",
|
||||
"unknown-kind-correlation",
|
||||
new MxCommand
|
||||
{
|
||||
Kind = unknownKind,
|
||||
}));
|
||||
|
||||
Assert.Equal(ProtocolStatusCode.InvalidRequest, reply.ProtocolStatus.Code);
|
||||
Assert.Contains("Unsupported", reply.DiagnosticMessage, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static StaCommand CreateSuspendCommand(
|
||||
string correlationId,
|
||||
int serverHandle,
|
||||
@@ -2229,21 +2259,6 @@ public sealed class MxAccessCommandExecutorTests
|
||||
SetBufferedUpdateIntervalValue = updateIntervalMilliseconds;
|
||||
}
|
||||
|
||||
/// <summary>Status stand-in reflected over by the worker's MxStatusProxy converter.</summary>
|
||||
internal sealed class FakeMxStatus
|
||||
{
|
||||
/// <summary>Success indicator read by the status converter.</summary>
|
||||
public int success;
|
||||
|
||||
/// <summary>Status category read by the status converter.</summary>
|
||||
public int category;
|
||||
|
||||
/// <summary>Status detected-by read by the status converter.</summary>
|
||||
public int detectedBy;
|
||||
|
||||
/// <summary>Status detail read by the status converter.</summary>
|
||||
public int detail;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Factory for creating fake MXAccess COM objects in tests.</summary>
|
||||
|
||||
@@ -94,6 +94,11 @@ internal sealed class NoopMxAccessServer : IMxAccessServer
|
||||
/// <c>success</c>, <c>category</c>, <c>detectedBy</c>, and <c>detail</c>
|
||||
/// fields, so this fake exposes the same field shape with all-OK values.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Previously duplicated as a nested class in <c>MxAccessCommandExecutorTests.FakeMxAccessComObject</c>.
|
||||
/// Consolidated here per Worker.Tests-034 so a future field change to the real COM struct only
|
||||
/// requires updating one place.
|
||||
/// </remarks>
|
||||
internal sealed class FakeMxStatus
|
||||
{
|
||||
// These public fields exist solely so MxStatusProxyConverter can reflect
|
||||
|
||||
Reference in New Issue
Block a user