From 961e09430a3b14628b10808a5bc3ddfbb19929a1 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 26 May 2026 13:59:23 -0400 Subject: [PATCH] feat(deploy): add site-a + site-b 2-node clusters to docker-dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the docker-dev compose with two additional, fully-isolated Akka clusters representing distinct sites. Each site is a 2-node fused admin+driver cluster (OTOPCUA_ROLES=admin,driver on both nodes), backed by its own ConfigDb database so configuration state stays separate from the main cluster and from the other site. Cluster isolation: the three meshes share the same Akka system name "otopcua" and remoting port 4053 (inside each container's own network namespace), but their seed-node lists are disjoint — main seeds at admin-a, site-a seeds at site-a-1, site-b seeds at site-b-1 — so gossip doesn't cross between them. Layout: Main cluster ConfigDb=OtOpcUa admin-a, admin-b, driver-a, driver-b Site A ConfigDb=OtOpcUa_SiteA site-a-1, site-a-2 (fused admin+driver) Site B ConfigDb=OtOpcUa_SiteB site-b-1, site-b-2 (fused admin+driver) OPC UA endpoints exposed on host ports 4840-4845. Admin UIs reachable through Traefik via Host-header routing: http://localhost → main cluster (PathPrefix default) http://site-a.localhost → site A http://site-b.localhost → site B `*.localhost` auto-resolves on macOS; Linux users add the two hosts to /etc/hosts (or rely on the resolver's RFC 6761 behaviour). --- docker-dev/README.md | 52 +++++++++--- docker-dev/docker-compose.yml | 141 ++++++++++++++++++++++++++++++--- docker-dev/traefik-dynamic.yml | 42 +++++++++- 3 files changed, 209 insertions(+), 26 deletions(-) diff --git a/docker-dev/README.md b/docker-dev/README.md index 5f08ac2..a9725c0 100644 --- a/docker-dev/README.md +++ b/docker-dev/README.md @@ -1,20 +1,41 @@ # docker-dev -Mac-friendly four-node OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up an Akka cluster + SQL Server + OpenLDAP + Traefik in front of two admin nodes. +Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up **three isolated Akka clusters** + SQL Server + OpenLDAP + Traefik on the same Compose network. Each cluster has its own ConfigDb database and its own seed-node list, so Akka.Cluster gossip doesn't cross between them even though they share the same system name `otopcua`. ## Stack +### Shared infrastructure + | Service | Role | Ports | |---|---|---| -| `sql` | SQL Server 2022 (`ConfigDb` backing store) | host `14330` → container `1433` | +| `sql` | SQL Server 2022 (hosts all per-cluster ConfigDb databases) | host `14330` → container `1433` | | `ldap` | OpenLDAP with dev users `alice` / `bob` | host `3893` → container `1389` | -| `admin-a` | OtOpcUa.Host, `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` | -| `admin-b` | OtOpcUa.Host, `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` | -| `driver-a` | OtOpcUa.Host, `OTOPCUA_ROLES=driver` | host `4840` → container `4840` | -| `driver-b` | OtOpcUa.Host, `OTOPCUA_ROLES=driver` | host `4841` → container `4840` | -| `traefik` | Routes `:80` to whichever admin-* currently passes `/health/active` | host `80`, dashboard `8080` | +| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8080` | -All six containers share an Akka cluster bound to port `4053` inside the Compose network. The Akka `PublicHostname` of each container matches its Compose service name; the seed-node list points at `admin-a` so the other three join via that. +### Main cluster — split admin/driver roles (ConfigDb: `OtOpcUa`) + +| Service | Role | Ports | +|---|---|---| +| `admin-a` | `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` | +| `admin-b` | `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` | +| `driver-a` | `OTOPCUA_ROLES=driver` | host `4840` → container `4840` | +| `driver-b` | `OTOPCUA_ROLES=driver` | host `4841` → container `4840` | + +### Site A cluster — 2-node fused admin+driver (ConfigDb: `OtOpcUa_SiteA`) + +| Service | Role | Ports | +|---|---|---| +| `site-a-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4842` → container `4840` | +| `site-a-2` | `OTOPCUA_ROLES=admin,driver`, joins site-a-1 | host `4843` → container `4840` | + +### Site B cluster — 2-node fused admin+driver (ConfigDb: `OtOpcUa_SiteB`) + +| Service | Role | Ports | +|---|---|---| +| `site-b-1` | `OTOPCUA_ROLES=admin,driver`, cluster seed | host `4844` → container `4840` | +| `site-b-2` | `OTOPCUA_ROLES=admin,driver`, joins site-b-1 | host `4845` → container `4840` | + +All containers bind Akka remoting to port `4053` inside their own network namespace; the `PublicHostname` of each matches its Compose service name. Cluster isolation is enforced purely by disjoint seed lists. ## Bring up @@ -22,12 +43,16 @@ All six containers share an Akka cluster bound to port `4053` inside the Compose # from the repo root docker compose -f docker-dev/docker-compose.yml up -d --build -# wait ~15 seconds for SQL to come up + the cluster to form +# wait ~20 seconds for SQL to come up + all three clusters to form -open http://localhost # Blazor admin UI via Traefik -open http://localhost:8080 # Traefik dashboard +open http://localhost # main cluster admin UI +open http://site-a.localhost # site A admin UI +open http://site-b.localhost # site B admin UI +open http://localhost:8080 # Traefik dashboard ``` +On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't. + The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache. ## Auth (dev only) @@ -58,5 +83,8 @@ The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across r ## Notes - This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings. -- The OPC UA driver endpoints (`opc.tcp://localhost:4840`, `opc.tcp://localhost:4841`) are reachable directly from the host — Traefik is only in front of the admin HTTP surface. +- The OPC UA driver endpoints are reachable directly from the host (Traefik is only in front of the admin HTTP surface): + - Main: `opc.tcp://localhost:4840` (driver-a), `opc.tcp://localhost:4841` (driver-b) + - Site A: `opc.tcp://localhost:4842` (site-a-1), `opc.tcp://localhost:4843` (site-a-2) + - Site B: `opc.tcp://localhost:4844` (site-b-1), `opc.tcp://localhost:4845` (site-b-2) - Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success. diff --git a/docker-dev/docker-compose.yml b/docker-dev/docker-compose.yml index 4dcc67a..0a153ed 100644 --- a/docker-dev/docker-compose.yml +++ b/docker-dev/docker-compose.yml @@ -1,18 +1,37 @@ -# docker-dev/ — Mac-friendly four-node fleet for v2 development + manual UI exercise. +# docker-dev/ — Mac-friendly multi-cluster fleet for v2 development + manual UI exercise. # -# Stack: -# sql SQL Server 2022 (ConfigDb backing store) -# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in -# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (cluster seed) -# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a) -# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) -# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) -# traefik Routes :80 to whichever admin-* currently passes /health/active +# Stack (3 separate Akka clusters sharing the same SQL + LDAP): +# sql SQL Server 2022 (per-cluster ConfigDb databases) +# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in +# +# Main cluster (existing — split-role admin / driver pair on a single Akka mesh): +# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (seed) +# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a) +# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) +# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) +# ConfigDb: OtOpcUa +# +# Site A cluster (2-node fused admin+driver — its own ConfigDb + seed): +# site-a-1, site-a-2 OTOPCUA_ROLES=admin,driver, seed = site-a-1 +# ConfigDb: OtOpcUa_SiteA +# +# Site B cluster (2-node fused admin+driver — its own ConfigDb + seed): +# site-b-1, site-b-2 OTOPCUA_ROLES=admin,driver, seed = site-b-1 +# ConfigDb: OtOpcUa_SiteB +# +# traefik PathPrefix → main cluster admin-a/admin-b; Host(`site-a.localhost`) → +# site-a-*; Host(`site-b.localhost`) → site-b-*. Add the two site hosts to +# your /etc/hosts (or rely on macOS `.localhost` auto-resolution). +# +# Cluster isolation is enforced by disjoint seed-node lists — Akka.Cluster gossip won't +# cross between the three meshes even though they share the same system name "otopcua". # # Usage: # docker compose -f docker-dev/docker-compose.yml up -d --build -# open http://localhost # Blazor admin UI via Traefik -# open http://localhost:8080 # Traefik dashboard +# open http://localhost # main cluster Blazor admin UI +# open http://site-a.localhost # site A admin UI +# open http://site-b.localhost # site B admin UI +# open http://localhost:8080 # Traefik dashboard # # Tear-down: docker compose -f docker-dev/docker-compose.yml down -v @@ -113,6 +132,102 @@ services: ports: - "4841:4840" + # ── Site A cluster (2-node fused admin+driver) ────────────────────────────── + # Its own ConfigDb (OtOpcUa_SiteA) + its own seed (site-a-1) → Akka isolation + # from the main cluster and from site B. Both nodes carry both roles. + + site-a-1: + <<: *otopcua-host + environment: + OTOPCUA_ROLES: "admin,driver" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa_SiteA;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "site-a-1" + Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053" + Cluster__Roles__0: "admin" + Cluster__Roles__1: "driver" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + ports: + - "4842:4840" + + site-a-2: + <<: *otopcua-host + depends_on: + sql: { condition: service_healthy } + site-a-1: { condition: service_started } + environment: + OTOPCUA_ROLES: "admin,driver" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa_SiteA;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "site-a-2" + Cluster__SeedNodes__0: "akka.tcp://otopcua@site-a-1:4053" + Cluster__Roles__0: "admin" + Cluster__Roles__1: "driver" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + ports: + - "4843:4840" + + # ── Site B cluster (2-node fused admin+driver) ────────────────────────────── + + site-b-1: + <<: *otopcua-host + environment: + OTOPCUA_ROLES: "admin,driver" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa_SiteB;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "site-b-1" + Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053" + Cluster__Roles__0: "admin" + Cluster__Roles__1: "driver" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + ports: + - "4844:4840" + + site-b-2: + <<: *otopcua-host + depends_on: + sql: { condition: service_healthy } + site-b-1: { condition: service_started } + environment: + OTOPCUA_ROLES: "admin,driver" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa_SiteB;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "site-b-2" + Cluster__SeedNodes__0: "akka.tcp://otopcua@site-b-1:4053" + Cluster__Roles__0: "admin" + Cluster__Roles__1: "driver" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + ports: + - "4845:4840" + traefik: image: traefik:v3.1 command: @@ -128,3 +243,7 @@ services: depends_on: - admin-a - admin-b + - site-a-1 + - site-a-2 + - site-b-1 + - site-b-2 diff --git a/docker-dev/traefik-dynamic.yml b/docker-dev/traefik-dynamic.yml index de51e14..610d0d8 100644 --- a/docker-dev/traefik-dynamic.yml +++ b/docker-dev/traefik-dynamic.yml @@ -1,6 +1,12 @@ -# docker-dev companion to scripts/install/traefik-dynamic.yml. Same routing rules, -# but the upstream targets are the Compose service names (admin-a, admin-b) on -# port 9000 instead of the Windows hostnames a bare-metal deployment would use. +# docker-dev companion to scripts/install/traefik-dynamic.yml. Routes three +# Akka clusters that share the Compose network: +# +# - Main cluster (default): PathPrefix(`/`) → admin-a / admin-b. +# - Site A cluster: Host(`site-a.localhost`) → site-a-1 / site-a-2. +# - Site B cluster: Host(`site-b.localhost`) → site-b-1 / site-b-2. +# +# Host-header rules are more specific than PathPrefix, so they win over the +# default router for the site hostnames automatically — no priority field needed. http: routers: @@ -9,6 +15,16 @@ http: rule: "PathPrefix(`/`)" service: otopcua-admin + otopcua-site-a: + entryPoints: ["web"] + rule: "Host(`site-a.localhost`)" + service: otopcua-site-a + + otopcua-site-b: + entryPoints: ["web"] + rule: "Host(`site-b.localhost`)" + service: otopcua-site-b + services: otopcua-admin: loadBalancer: @@ -19,3 +35,23 @@ http: path: /health/active interval: 5s timeout: 2s + + otopcua-site-a: + loadBalancer: + servers: + - url: "http://site-a-1:9000" + - url: "http://site-a-2:9000" + healthCheck: + path: /health/active + interval: 5s + timeout: 2s + + otopcua-site-b: + loadBalancer: + servers: + - url: "http://site-b-1:9000" + - url: "http://site-b-2:9000" + healthCheck: + path: /health/active + interval: 5s + timeout: 2s