diff --git a/CLAUDE.md b/CLAUDE.md index ff2705a..369c120 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,10 +8,10 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co The architecture is a two-process design — read `gateway.md` before making structural changes: -- **Gateway** (`src/MxGateway.Server`, .NET 10, x64): ASP.NET Core gRPC server. Owns the public API, sessions, auth, the Blazor dashboard, and the Galaxy Repository SQL browse RPCs. **Never instantiates MXAccess COM directly.** -- **Worker** (`src/MxGateway.Worker`, .NET Framework 4.8, **x86**): one process per session. Owns one MXAccess COM instance on a dedicated STA, pumps Windows messages, and converts COM events to protobuf. +- **Gateway** (`src/ZB.MOM.WW.MxGateway.Server`, .NET 10, x64): ASP.NET Core gRPC server. Owns the public API, sessions, auth, the Blazor dashboard, and the Galaxy Repository SQL browse RPCs. **Never instantiates MXAccess COM directly.** +- **Worker** (`src/ZB.MOM.WW.MxGateway.Worker`, .NET Framework 4.8, **x86**): one process per session. Owns one MXAccess COM instance on a dedicated STA, pumps Windows messages, and converts COM events to protobuf. - **IPC**: gateway↔worker uses one bidirectional named pipe per worker (`mxaccess-gateway-{gatewayPid}-{sessionId}`) with length-prefixed `WorkerEnvelope` protobuf frames. Gateway hosts the pipe server and launches the worker. **gRPC is not used inside the worker** — .NET Framework 4.8 doesn't have a first-class gRPC stack. -- **Contracts** (`src/MxGateway.Contracts`): multi-targets `net10.0;net48` and owns the `.proto` files (`mxaccess_gateway.proto`, `mxaccess_worker.proto`, `galaxy_repository.proto`). All other projects consume the generated types from here. Do not hand-edit anything under `Generated/`. +- **Contracts** (`src/ZB.MOM.WW.MxGateway.Contracts`): multi-targets `net10.0;net48` and owns the `.proto` files (`mxaccess_gateway.proto`, `mxaccess_worker.proto`, `galaxy_repository.proto`). All other projects consume the generated types from here. Do not hand-edit anything under `Generated/`. The worker must do all MXAccess COM calls on its dedicated STA thread, and the STA loop must pump Windows messages (`MsgWaitForMultipleObjectsEx` + `PeekMessage`/`DispatchMessage`) so MXAccess events deliver. A plain blocking queue on an STA is not enough. @@ -19,42 +19,42 @@ The worker must do all MXAccess COM calls on its dedicated STA thread, and the S ```powershell # Full solution build (gateway, worker, contracts, tests) -dotnet build src/MxGateway.sln +dotnet build src/ZB.MOM.WW.MxGateway.slnx -# Worker must be built x86 — the gateway looks for MxGateway.Worker.exe under bin\x86 -dotnet build src/MxGateway.Worker/MxGateway.Worker.csproj -p:Platform=x86 +# Worker must be built x86 — the gateway looks for ZB.MOM.WW.MxGateway.Worker.exe under bin\x86 +dotnet build src/ZB.MOM.WW.MxGateway.Worker/ZB.MOM.WW.MxGateway.Worker.csproj -p:Platform=x86 # Gateway tests (no MXAccess required — uses FakeWorkerHarness) -dotnet test src/MxGateway.Tests/MxGateway.Tests.csproj -dotnet test src/MxGateway.Worker.Tests/MxGateway.Worker.Tests.csproj -p:Platform=x86 +dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj +dotnet test src/ZB.MOM.WW.MxGateway.Worker.Tests/ZB.MOM.WW.MxGateway.Worker.Tests.csproj -p:Platform=x86 -# Run gateway locally (defaults bound under MxGateway:* in src/MxGateway.Server/appsettings.json) -dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj +# Run gateway locally (defaults bound under MxGateway:* in src/ZB.MOM.WW.MxGateway.Server/appsettings.json) +dotnet run --project src/ZB.MOM.WW.MxGateway.Server/ZB.MOM.WW.MxGateway.Server.csproj # API-key admin CLI (same exe, "apikey" subcommand) -dotnet run --project src/MxGateway.Server/MxGateway.Server.csproj -- apikey create --display-name "dev" --scopes session,invoke,event,metadata,admin +dotnet run --project src/ZB.MOM.WW.MxGateway.Server/ZB.MOM.WW.MxGateway.Server.csproj -- apikey create --display-name "dev" --scopes session,invoke,event,metadata,admin ``` Single test by name (xUnit `--filter`): ```powershell -dotnet test src/MxGateway.Tests/MxGateway.Tests.csproj --filter FullyQualifiedName~GatewayEndToEndFakeWorkerSmokeTests +dotnet test src/ZB.MOM.WW.MxGateway.Tests/ZB.MOM.WW.MxGateway.Tests.csproj --filter FullyQualifiedName~GatewayEndToEndFakeWorkerSmokeTests ``` Live MXAccess integration tests are **opt-in** because they need installed MXAccess COM and live provider state: ```powershell $env:MXGATEWAY_RUN_LIVE_MXACCESS_TESTS = "1" -dotnet test src/MxGateway.IntegrationTests/MxGateway.IntegrationTests.csproj --filter FullyQualifiedName~WorkerLiveMxAccessSmokeTests +dotnet test src/ZB.MOM.WW.MxGateway.IntegrationTests/ZB.MOM.WW.MxGateway.IntegrationTests.csproj --filter FullyQualifiedName~WorkerLiveMxAccessSmokeTests ``` Live LDAP tests use `MXGATEWAY_RUN_LIVE_LDAP_TESTS=1`. See `docs/GatewayTesting.md` for the full opt-in matrix and `LiveMxAccessFactAttribute` / `LiveLdapFactAttribute` for the gating logic. ## Clients -Each language client is in `clients//` with its own README. They all consume the shared `.proto` files in `src/MxGateway.Contracts/Protos`: +Each language client is in `clients//` with its own README. They all consume the shared `.proto` files in `src/ZB.MOM.WW.MxGateway.Contracts/Protos`: -- `clients/dotnet`: `dotnet build clients/dotnet/MxGateway.Client.sln` +- `clients/dotnet`: `dotnet build clients/dotnet/ZB.MOM.WW.MxGateway.Client.slnx` - `clients/python`: `python -m pip install -e ".[dev]"; python -m pytest` - `clients/rust`: `cargo test --workspace; cargo clippy --workspace --all-targets -- -D warnings` - `clients/java`: `gradle test` (Java 21) @@ -77,7 +77,7 @@ powershell -ExecutionPolicy Bypass -File scripts/run-client-e2e-tests.ps1 - **Gateway restart does not reattach orphan workers.** The first version terminates orphaned workers on startup; do not design code paths that assume reattachment. - **No Blazor UI component libraries.** Dashboard uses local Bootstrap CSS/JS only — do not introduce MudBlazor, Radzen, FluentUI, etc. - **Don't log secrets or full tag values by default.** API keys, passwords, `WriteSecured` payloads, and `AuthenticateUser` credentials must never reach logs. Value logging is opt-in and redacted. -- **Generated code** under `src/MxGateway.Contracts/Generated/`, `clients/*/generated*/`, `clients/python/src/mxgateway/generated/`, etc., is build output. Don't hand-edit. To regenerate, build the contracts project (`dotnet build src/MxGateway.Contracts/MxGateway.Contracts.csproj`) or run the per-client generation step in that client's README. +- **Generated code** under `src/ZB.MOM.WW.MxGateway.Contracts/Generated/`, `clients/*/generated*/`, `clients/python/src/mxgateway/generated/`, etc., is build output. Don't hand-edit. To regenerate, build the contracts project (`dotnet build src/ZB.MOM.WW.MxGateway.Contracts/ZB.MOM.WW.MxGateway.Contracts.csproj`) or run the per-client generation step in that client's README. - **Documentation style** (`StyleGuide.md`): PascalCase filenames, no marketing language, present tense, explain *why* not *what*. - **Update docs in the same change as the source.** When public APIs, contracts, configuration, build steps, security behavior, event shapes, value conversion, status mapping, or lifecycle rules change, the affected docs (`gateway.md`, `docs/`, client READMEs, design docs) must change in the same commit. Don't leave stale prose describing old behavior. @@ -88,9 +88,9 @@ When source code changes, build and test the affected component before reporting | Changed area | Required verification | |---|---| | Contracts or `.proto` files | regenerate generated code, then build gateway, worker, and every generated client touched by the contract | -| Gateway server, sessions, workers, gRPC, dashboard, metrics | `dotnet build src/MxGateway.Server` and run affected gateway / fake-worker tests | -| Worker IPC, STA, MXAccess, conversion | `dotnet build src/MxGateway.Worker -p:Platform=x86` and run worker tests | -| .NET client | `dotnet build clients/dotnet/MxGateway.Client.sln` and run its tests | +| Gateway server, sessions, workers, gRPC, dashboard, metrics | `dotnet build src/ZB.MOM.WW.MxGateway.Server` and run affected gateway / fake-worker tests | +| Worker IPC, STA, MXAccess, conversion | `dotnet build src/ZB.MOM.WW.MxGateway.Worker -p:Platform=x86` and run worker tests | +| .NET client | `dotnet build clients/dotnet/ZB.MOM.WW.MxGateway.Client.slnx` and run its tests | | Go client | `gofmt`, `go build ./...`, `go test ./...` from `clients/go` | | Rust client | `cargo fmt`, `cargo check --workspace`, `cargo test --workspace`, `cargo clippy --all-targets -- -D warnings` from `clients/rust` | | Python client | `python -m pytest` from `clients/python` | @@ -114,7 +114,7 @@ External analysis sources referenced by design docs: ## Authentication -Gateway gRPC clients authenticate with an API key in metadata: `authorization: Bearer mxgw__`. Keys are stored hashed (with a peppered SHA) in a gateway-owned SQLite DB (default `C:\ProgramData\MxGateway\gateway-auth.db`). Scopes (`session`, `invoke`, `event`, `metadata`, `admin`) gate specific RPCs; missing → `Unauthenticated`, insufficient → `PermissionDenied`. The `apikey` subcommand on the server exe manages keys; see `src/MxGateway.Server/Security/Authentication/`. +Gateway gRPC clients authenticate with an API key in metadata: `authorization: Bearer mxgw__`. Keys are stored hashed (with a peppered SHA) in a gateway-owned SQLite DB (default `C:\ProgramData\MxGateway\gateway-auth.db`). Scopes (`session`, `invoke`, `event`, `metadata`, `admin`) gate specific RPCs; missing → `Unauthenticated`, insufficient → `PermissionDenied`. The `apikey` subcommand on the server exe manages keys; see `src/ZB.MOM.WW.MxGateway.Server/Security/Authentication/`. Dashboard auth is LDAP-backed (separate from the gRPC API-key model). `/login` binds against `MxGateway:Ldap` and maps the user's LDAP groups to `Admin` or `Viewer` via `MxGateway:Dashboard:GroupToRole`, then issues an HTTP-only secure `__Host-MxGatewayDashboard` cookie. SignalR hubs at `/hubs/{snapshot,alarms,events}` accept either the cookie or a 30-minute bearer minted at `/hubs/token`. `Dashboard:AllowAnonymousLocalhost` bypasses auth on loopback when enabled. diff --git a/docs/AlarmClientDiscovery.md b/docs/AlarmClientDiscovery.md index 241b89b..ba1e4ca 100644 --- a/docs/AlarmClientDiscovery.md +++ b/docs/AlarmClientDiscovery.md @@ -762,16 +762,18 @@ in the codebase for the forward-compat shape, but the gateway-side `AcknowledgeAlarmByName` when the public RPC supplies a recognizable `Provider!Group.Tag` reference. -### 5. STA / threading — production fix needed +### 5. STA / threading — resolved The wnwrap COM is `ThreadingModel=Apartment`. The consumer's internal `Timer` fires on threadpool threads and would block forever on cross-apartment marshaling unless the host STA pumps Win32 messages. The smoke test sidesteps this by setting `pollIntervalMilliseconds=0` (Timer disabled) and driving `PollOnce` -manually from the test's STA. Production hosting will route polls -through the worker's `StaRuntime` in a follow-up — the consumer's -`PollOnce` is `public` and idempotent so the wire-up is mechanical. +manually from the test's STA. Production alarm polling was wired up +through the worker's `StaRuntime` via `GatewayAlarmMonitor`, which +owns the STA pump and drives alarm subscriptions through the worker +IPC path. This item is resolved; the wnwrap consumer's `PollOnce` +is no longer invoked directly in production. ### Capture summary diff --git a/docs/plans/2026-06-14-deferred-followups.md b/docs/plans/2026-06-14-deferred-followups.md index 066fc56..06c1aed 100644 --- a/docs/plans/2026-06-14-deferred-followups.md +++ b/docs/plans/2026-06-14-deferred-followups.md @@ -1,7 +1,7 @@ # Deferred Follow-ups Implementation Plan **Date:** 2026-06-14 -**Status:** Plan only — NOT yet executed. Saved for review. +**Status:** D1 executed (commit 4af24b9 — `mxgateway.alarms.provider_switches` emitted in `DashboardSnapshotService.cs:198`). D2 resolved as no-op (see resolution section below). D3–D5 remain pending (ops/validation, no code). **Context:** After the alarm-subtag-fallback cleanup (merged `5976770`) and its redeploy to windev (10.100.0.48), five items remain deferred. This plan handles all five. They are independent — execute in any order, or cherry-pick. Items D1–D2 are code (branch off `main`); diff --git a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/Hubs/EventsHub.cs b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/Hubs/EventsHub.cs index 382dcad..0560490 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/Hubs/EventsHub.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/Hubs/EventsHub.cs @@ -6,15 +6,9 @@ namespace ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs; /// /// SignalR hub for per-session MxEvent push. Clients call /// to join the group for a specific -/// session; the dashboard's MxEvent broadcaster (a future hook on -/// EventStreamService) sends messages to session:{id}. +/// session; sends messages to +/// session:{id} as events arrive from the live gRPC stream. /// -/// -/// The publisher side is intentionally a follow-up. Today the dashboard's -/// per-session event view is fed by the snapshot hub, which carries the -/// rolling recent-events list. Once a dedicated MxEvent broadcaster -/// lands, this hub's group convention is what it will publish to. -/// [Authorize(Policy = DashboardAuthenticationDefaults.HubClientsPolicy)] public sealed class EventsHub : Hub { diff --git a/src/ZB.MOM.WW.MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs b/src/ZB.MOM.WW.MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs index 0113b81..d247a6d 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Grpc/GalaxyRepositoryGrpcService.cs @@ -1,6 +1,5 @@ using Google.Protobuf.WellKnownTypes; using Grpc.Core; -using Microsoft.Data.SqlClient; using ZB.MOM.WW.MxGateway.Contracts.Proto.Galaxy; using GalaxyDb = ZB.MOM.WW.MxGateway.Server.Galaxy; using ZB.MOM.WW.MxGateway.Server.Security.Authentication; @@ -20,8 +19,7 @@ public sealed class GalaxyRepositoryGrpcService( GalaxyDb.IGalaxyRepository repository, GalaxyDb.IGalaxyHierarchyCache cache, GalaxyDb.IGalaxyDeployNotifier notifier, - IGatewayRequestIdentityAccessor identityAccessor, - ILogger logger) : ProtoGalaxyRepository.GalaxyRepositoryBase + IGatewayRequestIdentityAccessor identityAccessor) : ProtoGalaxyRepository.GalaxyRepositoryBase { private static readonly TimeSpan FirstLoadWaitBudget = TimeSpan.FromSeconds(5); private const int DefaultDiscoverPageSize = 1000; @@ -347,15 +345,4 @@ public sealed class GalaxyRepositoryGrpcService( private sealed record PageToken(long Sequence, string FilterSignature, int Offset); - [System.Diagnostics.CodeAnalysis.SuppressMessage( - "Style", - "IDE0051:Remove unused private members", - Justification = "Kept for parity with prior SQL exception mapping; future direct-SQL paths reuse it.")] - private RpcException MapSqlException(SqlException exception) - { - logger.LogWarning(exception, "Galaxy repository query failed."); - return new RpcException(new Status( - StatusCode.Unavailable, - "Galaxy repository is unavailable.")); - } }