diff --git a/docker-dev/Dockerfile b/docker-dev/Dockerfile new file mode 100644 index 0000000..b4cf1f7 --- /dev/null +++ b/docker-dev/Dockerfile @@ -0,0 +1,20 @@ +# Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml +# to spin four host containers (admin-a, admin-b, driver-a, driver-b) from a single image — +# Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them. + +FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build +WORKDIR /src +COPY . . +RUN dotnet restore ZB.MOM.WW.OtOpcUa.slnx +RUN dotnet publish src/Server/ZB.MOM.WW.OtOpcUa.Host/ZB.MOM.WW.OtOpcUa.Host.csproj \ + -c Release -o /app --no-restore + +FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime +WORKDIR /app +COPY --from=build /app ./ + +EXPOSE 9000 +EXPOSE 4053 +EXPOSE 4840 + +ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"] diff --git a/docker-dev/README.md b/docker-dev/README.md new file mode 100644 index 0000000..5f08ac2 --- /dev/null +++ b/docker-dev/README.md @@ -0,0 +1,62 @@ +# docker-dev + +Mac-friendly four-node OtOpcUa fleet for manual UI exercise + integration smoke tests. Spins up an Akka cluster + SQL Server + OpenLDAP + Traefik in front of two admin nodes. + +## Stack + +| Service | Role | Ports | +|---|---|---| +| `sql` | SQL Server 2022 (`ConfigDb` backing store) | host `14330` → container `1433` | +| `ldap` | OpenLDAP with dev users `alice` / `bob` | host `3893` → container `1389` | +| `admin-a` | OtOpcUa.Host, `OTOPCUA_ROLES=admin`, cluster seed | internal `9000` | +| `admin-b` | OtOpcUa.Host, `OTOPCUA_ROLES=admin`, joins admin-a | internal `9000` | +| `driver-a` | OtOpcUa.Host, `OTOPCUA_ROLES=driver` | host `4840` → container `4840` | +| `driver-b` | OtOpcUa.Host, `OTOPCUA_ROLES=driver` | host `4841` → container `4840` | +| `traefik` | Routes `:80` to whichever admin-* currently passes `/health/active` | host `80`, dashboard `8080` | + +All six containers share an Akka cluster bound to port `4053` inside the Compose network. The Akka `PublicHostname` of each container matches its Compose service name; the seed-node list points at `admin-a` so the other three join via that. + +## Bring up + +```bash +# from the repo root +docker compose -f docker-dev/docker-compose.yml up -d --build + +# wait ~15 seconds for SQL to come up + the cluster to form + +open http://localhost # Blazor admin UI via Traefik +open http://localhost:8080 # Traefik dashboard +``` + +The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache. + +## Auth (dev only) + +Use one of the LDAP dev users from `LDAP_USERS` in `docker-compose.yml`: + +| Username | Password | +|---|---| +| `alice` | `alice123` | +| `bob` | `bob123` | + +The compose mounts everyone into `ou=FleetAdmin` so the dev role mapping resolves to `FleetAdmin`. + +## Tear down + +```bash +docker compose -f docker-dev/docker-compose.yml down -v +``` + +The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across restarts. + +## Failover smoke + +1. Watch the Traefik dashboard at `http://localhost:8080`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service. +2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200. +3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it. + +## Notes + +- This compose is for the **local Mac/Linux developer rig**. The team's CI + soak runs go to the remote docker host at `10.100.0.35` (see `docs/v2/dev-environment.md`); the file there mirrors this one with adjusted port bindings. +- The OPC UA driver endpoints (`opc.tcp://localhost:4840`, `opc.tcp://localhost:4841`) are reachable directly from the host — Traefik is only in front of the admin HTTP surface. +- Galaxy + Wonderware drivers can't run in Linux containers (they need the Windows-only mxaccessgw + Historian SDK). On non-Windows, `DriverInstanceActor.ShouldStub(driverType, roles)` returns `true` for those types and the actor goes straight to a `Stubbed` state that returns deterministic success. diff --git a/docker-dev/docker-compose.yml b/docker-dev/docker-compose.yml new file mode 100644 index 0000000..4dcc67a --- /dev/null +++ b/docker-dev/docker-compose.yml @@ -0,0 +1,130 @@ +# docker-dev/ — Mac-friendly four-node fleet for v2 development + manual UI exercise. +# +# Stack: +# sql SQL Server 2022 (ConfigDb backing store) +# ldap OpenLDAP with the dev users from C:\publish\glauth\auth.md mirrored in +# admin-a OtOpcUa.Host with OTOPCUA_ROLES=admin (cluster seed) +# admin-b OtOpcUa.Host with OTOPCUA_ROLES=admin (joins admin-a) +# driver-a OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) +# driver-b OtOpcUa.Host with OTOPCUA_ROLES=driver (joins via admin-a) +# traefik Routes :80 to whichever admin-* currently passes /health/active +# +# Usage: +# docker compose -f docker-dev/docker-compose.yml up -d --build +# open http://localhost # Blazor admin UI via Traefik +# open http://localhost:8080 # Traefik dashboard +# +# Tear-down: docker compose -f docker-dev/docker-compose.yml down -v + +name: otopcua-dev + +services: + + sql: + image: mcr.microsoft.com/mssql/server:2022-latest + environment: + ACCEPT_EULA: "Y" + SA_PASSWORD: "OtOpcUa!Dev123" + MSSQL_PID: Developer + ports: + - "14330:1433" + healthcheck: + test: ["CMD-SHELL", "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P 'OtOpcUa!Dev123' -No -Q 'SELECT 1' || exit 1"] + interval: 10s + timeout: 5s + retries: 20 + + ldap: + image: bitnami/openldap:2.6 + environment: + LDAP_ROOT: "dc=lmxopcua,dc=local" + LDAP_ADMIN_USERNAME: "admin" + LDAP_ADMIN_PASSWORD: "ldapadmin" + LDAP_USERS: "alice,bob" + LDAP_PASSWORDS: "alice123,bob123" + LDAP_USER_DC: "ou=FleetAdmin" + ports: + - "3893:1389" + + admin-a: &otopcua-host + build: + context: .. + dockerfile: docker-dev/Dockerfile + image: otopcua-host:dev + depends_on: + sql: { condition: service_healthy } + environment: + OTOPCUA_ROLES: "admin" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "admin-a" + Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053" + Cluster__Roles__0: "admin" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + + admin-b: + <<: *otopcua-host + environment: + OTOPCUA_ROLES: "admin" + ASPNETCORE_URLS: "http://+:9000" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "admin-b" + Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053" + Cluster__Roles__0: "admin" + Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" + Security__Jwt__Issuer: "otopcua-dev" + Security__Jwt__Audience: "otopcua-dev" + Authentication__Ldap__Server: "ldap" + Authentication__Ldap__Port: "1389" + Authentication__Ldap__AllowInsecureLdap: "true" + + driver-a: + <<: *otopcua-host + environment: + OTOPCUA_ROLES: "driver" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "driver-a" + Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053" + Cluster__Roles__0: "driver" + ports: + - "4840:4840" + + driver-b: + <<: *otopcua-host + environment: + OTOPCUA_ROLES: "driver" + ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + Cluster__Hostname: "0.0.0.0" + Cluster__Port: "4053" + Cluster__PublicHostname: "driver-b" + Cluster__SeedNodes__0: "akka.tcp://otopcua@admin-a:4053" + Cluster__Roles__0: "driver" + ports: + - "4841:4840" + + traefik: + image: traefik:v3.1 + command: + - --entrypoints.web.address=:80 + - --providers.file.filename=/etc/traefik/dynamic.yml + - --providers.file.watch=true + - --api.insecure=true + ports: + - "80:80" + - "8080:8080" + volumes: + - ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro + depends_on: + - admin-a + - admin-b diff --git a/docker-dev/traefik-dynamic.yml b/docker-dev/traefik-dynamic.yml new file mode 100644 index 0000000..de51e14 --- /dev/null +++ b/docker-dev/traefik-dynamic.yml @@ -0,0 +1,21 @@ +# docker-dev companion to scripts/install/traefik-dynamic.yml. Same routing rules, +# but the upstream targets are the Compose service names (admin-a, admin-b) on +# port 9000 instead of the Windows hostnames a bare-metal deployment would use. + +http: + routers: + otopcua-admin: + entryPoints: ["web"] + rule: "PathPrefix(`/`)" + service: otopcua-admin + + services: + otopcua-admin: + loadBalancer: + servers: + - url: "http://admin-a:9000" + - url: "http://admin-b:9000" + healthCheck: + path: /health/active + interval: 5s + timeout: 2s diff --git a/scripts/install/Install-Traefik.ps1 b/scripts/install/Install-Traefik.ps1 new file mode 100644 index 0000000..49cd817 --- /dev/null +++ b/scripts/install/Install-Traefik.ps1 @@ -0,0 +1,68 @@ +<# +.SYNOPSIS + Installs Traefik as a Windows service that routes admin HTTP traffic to whichever + OtOpcUa.Host node holds the admin role-leader (via /health/active). + +.DESCRIPTION + Downloads the Traefik Windows binary into $InstallRoot, drops traefik.yml + + traefik-dynamic.yml from this directory next to it, and registers Traefik as a + Windows service via sc.exe with restart-on-failure. + + Companion to Install-Services.ps1. Run on the box that fronts the admin HTTP + traffic (typically a separate node from OtOpcUaHost, or co-located on the + primary admin node). + +.PARAMETER InstallRoot + Where the Traefik binary + config land. Default 'C:\Program Files\Traefik'. + +.PARAMETER TraefikVersion + Traefik version to download. Default 'v3.1.6'. + +.EXAMPLE + .\Install-Traefik.ps1 -InstallRoot 'C:\Program Files\Traefik' +#> +[CmdletBinding()] +param( + [string]$InstallRoot = 'C:\Program Files\Traefik', + [string]$TraefikVersion = 'v3.1.6' +) + +$ErrorActionPreference = 'Stop' + +if (-not (Test-Path $InstallRoot)) { + New-Item -ItemType Directory -Path $InstallRoot | Out-Null +} + +$zip = Join-Path $env:TEMP "traefik-$TraefikVersion.zip" +$url = "https://github.com/traefik/traefik/releases/download/$TraefikVersion/traefik_${TraefikVersion}_windows_amd64.zip" + +Write-Host "Downloading Traefik $TraefikVersion..." +Invoke-WebRequest -Uri $url -OutFile $zip +Expand-Archive -Path $zip -DestinationPath $InstallRoot -Force +Remove-Item $zip + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +Copy-Item -Force (Join-Path $scriptDir 'traefik.yml') $InstallRoot +Copy-Item -Force (Join-Path $scriptDir 'traefik-dynamic.yml') (Join-Path $InstallRoot 'dynamic.yml') + +# Traefik reads dynamic.yml from /etc/traefik on Linux; on Windows place it next to the +# binary and point the file provider at it. Edit traefik.yml's `filename:` if you want +# to change the location. +(Get-Content -Raw (Join-Path $InstallRoot 'traefik.yml')) ` + -replace '/etc/traefik/dynamic.yml', (Join-Path $InstallRoot 'dynamic.yml').Replace('\', '/') ` + | Set-Content (Join-Path $InstallRoot 'traefik.yml') + +Write-Host "Installing Traefik Windows service..." +& sc.exe create OtOpcUaTraefik binPath= "`"$InstallRoot\traefik.exe`" --configFile=`"$InstallRoot\traefik.yml`"" ` + DisplayName= 'OtOpcUa Traefik (admin HTTP front door)' ` + start= auto | Out-Null + +& sc.exe failure OtOpcUaTraefik reset= 86400 actions= restart/5000/restart/30000/restart/60000 | Out-Null + +Write-Host "" +Write-Host "Installed OtOpcUaTraefik. Edit:" +Write-Host " $InstallRoot\dynamic.yml (router + service definitions)" +Write-Host "Start with:" +Write-Host " sc.exe start OtOpcUaTraefik" +Write-Host "" +Write-Host "Traefik dashboard: http://localhost:8080 (turn off api.insecure in production)" diff --git a/scripts/install/traefik-dynamic.yml b/scripts/install/traefik-dynamic.yml new file mode 100644 index 0000000..00fe687 --- /dev/null +++ b/scripts/install/traefik-dynamic.yml @@ -0,0 +1,24 @@ +# Dynamic (file-provider) Traefik config for the OtOpcUa admin HTTP routing. +# Picked up by traefik.yml's file provider (with watch: true) so router/service +# edits hot-reload without a Traefik restart. + +http: + routers: + otopcua-admin: + entryPoints: ["web"] + rule: "HostRegexp(`otopcua.*`)" + service: otopcua-admin + + services: + otopcua-admin: + loadBalancer: + servers: + - url: "http://admin-a:9000" + - url: "http://admin-b:9000" + healthCheck: + path: /health/active + interval: 5s + timeout: 2s + # Default expected status is 2xx. Followers return 503 from + # /health/active so Traefik will drop them from the balancer + # within the next interval after a leadership change. diff --git a/scripts/install/traefik.yml b/scripts/install/traefik.yml new file mode 100644 index 0000000..cc0bd46 --- /dev/null +++ b/scripts/install/traefik.yml @@ -0,0 +1,30 @@ +# Traefik static configuration for the OtOpcUa fleet HTTP front door. +# +# Routes admin-role HTTP traffic (Blazor + auth + SignalR + /auth/*) to whichever +# OtOpcUa.Host node currently holds the admin role-leader. Uses the /health/active +# endpoint as the active-leader signal: a node returns 200 only when it is the +# Akka admin role-leader; followers return 503 and Traefik routes around them. +# +# OPC UA traffic is NOT routed through Traefik — clients connect directly to +# opc.tcp://node:4840 on every driver node and use the standard ServiceLevel +# heuristic for failover. + +entryPoints: + web: + address: ":80" + +providers: + file: + filename: /etc/traefik/dynamic.yml + watch: true + +api: + insecure: true + dashboard: true + +log: + level: INFO + format: common + +accessLog: + format: common