From 44b8a9c7ff746909b2f5cc72b0bd8f6ccbec370a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 26 May 2026 15:10:11 -0400 Subject: [PATCH] fix(deploy): ClusterNode NodeId uses host:port + Traefik sticky cookie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bring-up issues found while clicking through the operator Deploy flow on the docker-dev stack: - ConfigPublishCoordinator computes expected-ack NodeIds from Akka.Cluster.State.Members as "{host}:{port}" (e.g. "driver-a:4053") to match ClusterRoleInfo's NodeId derivation. The seed had been using the bare service name ("driver-a"), so NodeDeploymentState INSERT hit FK violation 547 on NodeDeploymentState.NodeId → ClusterNode.NodeId. Seed now writes the full host:port form for every ClusterNode row. - Blazor Server uses SignalR (WebSocket upgrade after the initial GET). Without sticky sessions, Traefik round-robins admin-a/admin-b and the WebSocket upgrade lands on the wrong backend, returning "No Connection with that ID: Status code '404'" so @onclick handlers never fire on the client. Added sticky.cookie (otopcua_lb, SameSite=Lax) to all three Traefik service loadBalancers so each session pins to one node. Verified end-to-end: clicked "Deploy current configuration" on /deployments → Deployment row sealed in ~70ms → driver-a + driver-b spawn GalaxyMxGateway driver (stub=False) → GalaxyDriver connects to http://10.100.0.48:5120 with the seeded ApiKeySecretRef=env:GALAXY_MXGW_API_KEY. --- docker-dev/seed/seed-clusters.sql | 29 +++++++++++++++++------------ docker-dev/traefik-dynamic.yml | 24 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/docker-dev/seed/seed-clusters.sql b/docker-dev/seed/seed-clusters.sql index 0b7a1e5..d47d40e 100644 --- a/docker-dev/seed/seed-clusters.sql +++ b/docker-dev/seed/seed-clusters.sql @@ -55,45 +55,50 @@ IF NOT EXISTS (SELECT 1 FROM dbo.ServerCluster WHERE ClusterId = 'SITE-B') ------------------------------------------------------------------------------ -- ClusterNode — main cluster OPC UA publishers +-- +-- NodeId is ":4053" so it matches what ClusterRoleInfo + +-- ConfigPublishCoordinator derive from Akka.Cluster.Get(system).State.Members +-- (member.Address.Host:Port). NodeDeploymentState.NodeId is FK-bound to +-- ClusterNode.NodeId; mismatched values cause FK 547 on deploy. ------------------------------------------------------------------------------ -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-a:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('driver-a', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed'); + VALUES ('driver-a:4053', 'MAIN', 'driver-a', 4840, 8081, 'urn:OtOpcUa:driver-a', 200, 1, 'docker-dev-seed'); -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'driver-b:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('driver-b', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed'); + VALUES ('driver-b:4053', 'MAIN', 'driver-b', 4840, 8081, 'urn:OtOpcUa:driver-b', 150, 1, 'docker-dev-seed'); ------------------------------------------------------------------------------ -- ClusterNode — site A ------------------------------------------------------------------------------ -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-1') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-1:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('site-a-1', 'SITE-A', 'site-a-1', 4840, 8081, 'urn:OtOpcUa:site-a-1', 200, 1, 'docker-dev-seed'); + VALUES ('site-a-1:4053', 'SITE-A', 'site-a-1', 4840, 8081, 'urn:OtOpcUa:site-a-1', 200, 1, 'docker-dev-seed'); -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-2') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-a-2:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('site-a-2', 'SITE-A', 'site-a-2', 4840, 8081, 'urn:OtOpcUa:site-a-2', 150, 1, 'docker-dev-seed'); + VALUES ('site-a-2:4053', 'SITE-A', 'site-a-2', 4840, 8081, 'urn:OtOpcUa:site-a-2', 150, 1, 'docker-dev-seed'); ------------------------------------------------------------------------------ -- ClusterNode — site B ------------------------------------------------------------------------------ -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-1') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-1:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('site-b-1', 'SITE-B', 'site-b-1', 4840, 8081, 'urn:OtOpcUa:site-b-1', 200, 1, 'docker-dev-seed'); + VALUES ('site-b-1:4053', 'SITE-B', 'site-b-1', 4840, 8081, 'urn:OtOpcUa:site-b-1', 200, 1, 'docker-dev-seed'); -IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-2') +IF NOT EXISTS (SELECT 1 FROM dbo.ClusterNode WHERE NodeId = 'site-b-2:4053') INSERT INTO dbo.ClusterNode (NodeId, ClusterId, Host, OpcUaPort, DashboardPort, ApplicationUri, ServiceLevelBase, Enabled, CreatedBy) - VALUES ('site-b-2', 'SITE-B', 'site-b-2', 4840, 8081, 'urn:OtOpcUa:site-b-2', 150, 1, 'docker-dev-seed'); + VALUES ('site-b-2:4053', 'SITE-B', 'site-b-2', 4840, 8081, 'urn:OtOpcUa:site-b-2', 150, 1, 'docker-dev-seed'); ------------------------------------------------------------------------------ -- Galaxy MxAccess gateway — MAIN cluster diff --git a/docker-dev/traefik-dynamic.yml b/docker-dev/traefik-dynamic.yml index 610d0d8..e54429f 100644 --- a/docker-dev/traefik-dynamic.yml +++ b/docker-dev/traefik-dynamic.yml @@ -28,6 +28,14 @@ http: services: otopcua-admin: loadBalancer: + # Blazor Server uses SignalR; the WebSocket upgrade must hit the same + # backend that owns the circuit ID. Sticky cookie keeps each session + # pinned to one node so the post-handshake WebSocket doesn't 404. + sticky: + cookie: + name: otopcua_lb + httpOnly: true + sameSite: lax servers: - url: "http://admin-a:9000" - url: "http://admin-b:9000" @@ -38,6 +46,14 @@ http: otopcua-site-a: loadBalancer: + # Blazor Server uses SignalR; the WebSocket upgrade must hit the same + # backend that owns the circuit ID. Sticky cookie keeps each session + # pinned to one node so the post-handshake WebSocket doesn't 404. + sticky: + cookie: + name: otopcua_lb + httpOnly: true + sameSite: lax servers: - url: "http://site-a-1:9000" - url: "http://site-a-2:9000" @@ -48,6 +64,14 @@ http: otopcua-site-b: loadBalancer: + # Blazor Server uses SignalR; the WebSocket upgrade must hit the same + # backend that owns the circuit ID. Sticky cookie keeps each session + # pinned to one node so the post-handshake WebSocket doesn't 404. + sticky: + cookie: + name: otopcua_lb + httpOnly: true + sameSite: lax servers: - url: "http://site-b-1:9000" - url: "http://site-b-2:9000"