diff --git a/docker-dev/README.md b/docker-dev/README.md index 4bd15fd..a7eede8 100644 --- a/docker-dev/README.md +++ b/docker-dev/README.md @@ -9,8 +9,9 @@ Mac-friendly multi-cluster OtOpcUa fleet for manual UI exercise + integration sm | Service | Role | Ports | |---|---|---| | `sql` | SQL Server 2022 — single `OtOpcUa` ConfigDb shared by all three clusters | host `14330` → container `1433` | -| `ldap` | OpenLDAP with dev users `alice` / `bob` | host `3893` → container `1389` | -| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8080` | +| `traefik` | Routes :80 by Host header / PathPrefix | host `80`, dashboard `8089` | + +Authentication runs in `DevStubMode` — every host container has `Authentication__Ldap__DevStubMode=true` set, so the LDAP service is not part of the dev compose right now (the `bitnami/openldap:2.6` image was retired and the legacy tag crashes mid-setup with exit 68). Any non-empty username/password signs in as `FleetAdmin`. To restore a real LDAP service, drop the env var and add an `openldap`-compatible image back to compose. ### Main cluster — split admin/driver roles @@ -70,7 +71,7 @@ docker compose -f docker-dev/docker-compose.yml up -d --build open http://localhost # main cluster admin UI open http://site-a.localhost # site A admin UI open http://site-b.localhost # site B admin UI -open http://localhost:8080 # Traefik dashboard +open http://localhost:8089 # Traefik dashboard ``` On macOS, `*.localhost` resolves to `127.0.0.1` automatically. On Linux add `127.0.0.1 site-a.localhost site-b.localhost` to `/etc/hosts` if your resolver doesn't. @@ -79,14 +80,7 @@ The first build takes a few minutes (.NET SDK image + restore + publish). Subseq ## Auth (dev only) -Use one of the LDAP dev users from `LDAP_USERS` in `docker-compose.yml`: - -| Username | Password | -|---|---| -| `alice` | `alice123` | -| `bob` | `bob123` | - -The compose mounts everyone into `ou=FleetAdmin` so the dev role mapping resolves to `FleetAdmin`. +`Authentication__Ldap__DevStubMode=true` is set on every host container, so any non-empty username/password signs in as a `FleetAdmin` user without contacting an LDAP server. **Do not** ship this configuration to production — set `DevStubMode=false` and wire a real LDAP backend before any non-dev deployment. ## Tear down @@ -98,7 +92,7 @@ The `-v` drops the SQL + LDAP volumes; remove it to keep ConfigDb state across r ## Failover smoke -1. Watch the Traefik dashboard at `http://localhost:8080`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service. +1. Watch the Traefik dashboard at `http://localhost:8089`. Both `admin-a` and `admin-b` should be listed as healthy in the `otopcua-admin` service. 2. `docker compose -f docker-dev/docker-compose.yml stop admin-a` — `admin-b` should pick up the admin role-leader within ~15 s (Akka split-brain stable-after). Traefik will route traffic to `admin-b` once its `/health/active` returns 200. 3. `docker compose -f docker-dev/docker-compose.yml start admin-a` — `admin-a` rejoins as a follower; `admin-b` keeps the leader role until something disturbs it. diff --git a/docker-dev/docker-compose.yml b/docker-dev/docker-compose.yml index 69ba80e..f091e02 100644 --- a/docker-dev/docker-compose.yml +++ b/docker-dev/docker-compose.yml @@ -35,7 +35,7 @@ # open http://localhost # main cluster Blazor admin UI # open http://site-a.localhost # site A admin UI # open http://site-b.localhost # site B admin UI -# open http://localhost:8080 # Traefik dashboard +# open http://localhost:8089 # Traefik dashboard (8080 is the sister scadalink stack) # # Tear-down: docker compose -f docker-dev/docker-compose.yml down -v @@ -71,17 +71,12 @@ services: entrypoint: ["/bin/bash", "/seed/entrypoint.sh"] restart: "no" - ldap: - image: bitnami/openldap:2.6 - environment: - LDAP_ROOT: "dc=lmxopcua,dc=local" - LDAP_ADMIN_USERNAME: "admin" - LDAP_ADMIN_PASSWORD: "ldapadmin" - LDAP_USERS: "alice,bob" - LDAP_PASSWORDS: "alice123,bob123" - LDAP_USER_DC: "ou=FleetAdmin" - ports: - - "3893:1389" + # OpenLDAP was previously here but the bitnami/openldap:2.6 image was retired + # (manifest gone) and bitnamilegacy/openldap:2.6 crashes during LDIF setup with + # exit 68. For the dev compose every host container now runs with + # Authentication__Ldap__DevStubMode=true, so any non-empty username/password + # signs in as `FleetAdmin`. Restore a real LDAP service when there's a need + # for end-to-end LDAP coverage (the host code path is unchanged). admin-a: &otopcua-host build: @@ -102,9 +97,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" admin-b: <<: *otopcua-host @@ -120,9 +113,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" driver-a: <<: *otopcua-host @@ -170,9 +161,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" ports: - "4842:4840" @@ -194,9 +183,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" ports: - "4843:4840" @@ -217,9 +204,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" ports: - "4844:4840" @@ -241,9 +226,7 @@ services: Security__Jwt__SigningKey: "docker-dev-signing-key-with-at-least-32-bytes-of-utf8-content-12345" Security__Jwt__Issuer: "otopcua-dev" Security__Jwt__Audience: "otopcua-dev" - Authentication__Ldap__Server: "ldap" - Authentication__Ldap__Port: "1389" - Authentication__Ldap__AllowInsecureLdap: "true" + Authentication__Ldap__DevStubMode: "true" ports: - "4845:4840" @@ -256,7 +239,7 @@ services: - --api.insecure=true ports: - "80:80" - - "8080:8080" + - "8089:8080" # 8080 conflicts with the sister scadalink dev stack volumes: - ./traefik-dynamic.yml:/etc/traefik/dynamic.yml:ro depends_on: diff --git a/docker-dev/seed/entrypoint.sh b/docker-dev/seed/entrypoint.sh index d2799ce..5da1f00 100755 --- a/docker-dev/seed/entrypoint.sh +++ b/docker-dev/seed/entrypoint.sh @@ -1,35 +1,48 @@ #!/usr/bin/env bash -# docker-dev cluster-seed entrypoint. Waits for the host containers to finish -# their EF Core auto-migration (which creates the ServerCluster table), then -# applies the idempotent seed script. +# docker-dev cluster-seed entrypoint. Waits for the OtOpcUa ConfigDb schema to +# be in place, then applies the idempotent row seed. # -# Image: mcr.microsoft.com/mssql-tools (Debian + sqlcmd at /opt/mssql-tools18/bin). +# IMPORTANT: this container does NOT run EF migrations — sqlcmd can't execute +# the V2 migration script cleanly because it contains CREATE PROCEDURE +# statements inside IF NOT EXISTS BEGIN ... END blocks (procs must be the +# first statement in their batch). Migrations are owned by the operator: +# +# dotnet ef database update \ +# --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \ +# --startup-project src/Server/ZB.MOM.WW.OtOpcUa.Host +# +# (with ConnectionStrings__ConfigDb pointing at Server=localhost,14330;...). +# Once the schema is in place, restart the cluster-seed container — or just +# `docker compose up -d` and the seed will pick up where it left off thanks to +# the IF NOT EXISTS guards in seed-clusters.sql. set -euo pipefail -SQLCMD="/opt/mssql-tools18/bin/sqlcmd" +SQLCMD="/opt/mssql-tools/bin/sqlcmd" SERVER="${SQL_HOST:-sql},1433" USER="${SQL_USER:-sa}" PASS="${SQL_PASSWORD:-OtOpcUa!Dev123}" DB="${SQL_DATABASE:-OtOpcUa}" -run_sql() { - "$SQLCMD" -S "$SERVER" -U "$USER" -P "$PASS" -d "$DB" -No -b -h -1 "$@" +run_sql_in() { + local target_db="$1"; shift + # -I forces SET QUOTED_IDENTIFIER ON (needed for filtered indexes if you + # ever extend this script to touch them). + "$SQLCMD" -S "$SERVER" -U "$USER" -P "$PASS" -d "$target_db" -b -h -1 -I "$@" } echo "[cluster-seed] waiting for SQL Server to accept connections..." -until run_sql -Q "SELECT 1" >/dev/null 2>&1; do +until run_sql_in master -Q "SELECT 1" >/dev/null 2>&1; do sleep 2 done echo "[cluster-seed] SQL Server up." -echo "[cluster-seed] waiting for $DB.ServerCluster (host containers must finish EF migration)..." -until run_sql -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do +echo "[cluster-seed] waiting for ${DB} database + dbo.ServerCluster table (operator must run dotnet ef database update)..." +until run_sql_in "$DB" -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do sleep 3 done echo "[cluster-seed] schema ready." -echo "[cluster-seed] applying seed-clusters.sql..." -run_sql -i /seed/seed-clusters.sql - +echo "[cluster-seed] applying seed-clusters.sql (ServerCluster + ClusterNode rows)..." +run_sql_in "$DB" -i /seed/seed-clusters.sql echo "[cluster-seed] done." diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Host/Health/HealthEndpoints.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Host/Health/HealthEndpoints.cs index 1d2e3cd..08b6b4c 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.Host/Health/HealthEndpoints.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Host/Health/HealthEndpoints.cs @@ -24,18 +24,21 @@ public static class HealthEndpoints public static IEndpointRouteBuilder MapOtOpcUaHealth(this IEndpointRouteBuilder app) { + // AllowAnonymous on all three — Traefik / k8s liveness probes / load-balancers + // hit these without credentials. Without it the AddOtOpcUaAuth fallback policy + // 401s every probe and Traefik marks every backend unhealthy. app.MapHealthChecks("/health/ready", new HealthCheckOptions { Predicate = c => c.Tags.Contains("ready"), - }); + }).AllowAnonymous(); app.MapHealthChecks("/health/active", new HealthCheckOptions { Predicate = c => c.Tags.Contains("active"), - }); + }).AllowAnonymous(); app.MapHealthChecks("/healthz", new HealthCheckOptions { Predicate = _ => false, // process-liveness only — no probes run. - }); + }).AllowAnonymous(); return app; } }