From b0a62a9f3bf14d43bfb9a54ea70b32e8d96a1acf Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 7 Jun 2026 08:17:09 -0400 Subject: [PATCH] fix(docker-dev): self-bootstrap schema via one-shot migrator (fixes fresh-volume quirks) Adds a 'migrator' Dockerfile stage + Compose service that runs 'dotnet ef database update' once on bring-up, so a fresh SQL volume gets the schema with no operator step (quirk 1). cluster-seed + every host node depend on it via service_completed_successfully, so the seed never races an in-progress migration (quirk 2). Host build pinned to target: runtime (the migrator is now the last stage). entrypoint + README updated; the manual 'dotnet ef' first-time step is gone. Verified: down -v + up --build self-bootstraps (migrator+seed exit 0, 6 nodes up), deploy Sealed 6/6. --- docker-dev/Dockerfile | 16 +++++++++++++++ docker-dev/README.md | 27 ++++--------------------- docker-dev/docker-compose.yml | 38 +++++++++++++++++++++++++++++------ docker-dev/seed/entrypoint.sh | 25 +++++++++-------------- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/docker-dev/Dockerfile b/docker-dev/Dockerfile index 95008fba..15ec7781 100644 --- a/docker-dev/Dockerfile +++ b/docker-dev/Dockerfile @@ -1,6 +1,7 @@ # Multi-stage build of OtOpcUa.Host targeting linux-x64. Used by docker-dev/docker-compose.yml # to spin six host containers (central-1, central-2, site-a-1, site-a-2, site-b-1, site-b-2) from a single image — # Compose drives OTOPCUA_ROLES + Cluster:* env per container to differentiate them. +# A separate `migrator` stage (below) applies EF migrations once on bring-up. FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build WORKDIR /src @@ -18,3 +19,18 @@ EXPOSE 4053 EXPOSE 4840 ENTRYPOINT ["dotnet", "OtOpcUa.Host.dll"] + +# ── Migrator (one-shot) ────────────────────────────────────────────────────── +# Applies EF Core migrations to the ConfigDb so a fresh SQL volume gets the schema +# with no operator step. docker-dev compose runs this once, before cluster-seed + +# the host nodes (they depend on it via service_completed_successfully). The host +# nodes deliberately do NOT auto-migrate (production owns schema changes), so this +# rig-only stage carries that responsibility. The connection comes from the +# OTOPCUA_CONFIG_CONNECTION env var read by DesignTimeDbContextFactory. +FROM build AS migrator +RUN dotnet tool install --global dotnet-ef --version 10.0.7 +ENV PATH="${PATH}:/root/.dotnet/tools" +WORKDIR /src +ENTRYPOINT ["dotnet", "ef", "database", "update", \ + "--project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration", \ + "--startup-project", "src/Core/ZB.MOM.WW.OtOpcUa.Configuration"] diff --git a/docker-dev/README.md b/docker-dev/README.md index 30f4f886..4627eba6 100644 --- a/docker-dev/README.md +++ b/docker-dev/README.md @@ -42,7 +42,7 @@ Site nodes serve no UI and authenticate no users. The central cluster manages an All six host nodes write to the same `OtOpcUa` ConfigDb. The `ServerCluster` table differentiates the three logical clusters: each maps to one row, and each `ClusterNode` row's `ClusterId` ties the runtime node back to its owning cluster scope. -A one-shot `cluster-seed` Compose service (image `mcr.microsoft.com/mssql-tools`) waits for the `OtOpcUa` ConfigDb schema to exist (the host nodes do **not** auto-migrate — you apply EF migrations once; see [First-time setup](#first-time-setup-or-after-down--v)) and then INSERTs the rows below. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops: +Two one-shot Compose services bootstrap the DB on bring-up: `migrator` applies the EF Core migrations (so a fresh SQL volume gets the schema with no operator step — the host nodes deliberately do **not** auto-migrate, since production owns schema changes), then `cluster-seed` (image `mcr.microsoft.com/mssql-tools`) INSERTs the rows below. `cluster-seed` and every host node `depend_on` the `migrator` completing (`service_completed_successfully`), so the seed never races an in-progress migration. The seed is **idempotent** — `IF NOT EXISTS` guards every insert — so re-runs on `docker compose up` are no-ops: | Logical cluster | `ServerCluster.ClusterId` | `ClusterNode.NodeId` rows | |---|---|---| @@ -70,33 +70,14 @@ The DriverHost actor doesn't spawn drivers from raw DriverInstance rows on its o # from the repo root docker compose -f docker-dev/docker-compose.yml up -d --build -# wait ~20 seconds for SQL to come up + the mesh to form +# the one-shot migrator + cluster-seed bootstrap the DB; watch the seed finish: +docker compose -f docker-dev/docker-compose.yml logs -f cluster-seed # ^C once it prints "[cluster-seed] done." open http://localhost:9200 # Admin UI (Traefik → central-1 or central-2) open http://localhost:8089 # Traefik dashboard ``` -The first build takes a few minutes (.NET SDK image + restore + publish). Subsequent rebuilds are faster with Docker's layer cache. - -### First-time setup (or after `down -v`) - -The host nodes do **not** auto-create the ConfigDb schema — on a brand-new SQL volume you must apply the EF migrations once, then (re)run the seed. (The auto-started `cluster-seed` polls for `dbo.ServerCluster`, which the *first* migration creates, so if it runs mid-migration it can fail against an intermediate schema — just re-run it after migrations finish.) - -```bash -# 1. bring the stack up (SQL + nodes; nodes retry the DB until the schema exists) -docker compose -f docker-dev/docker-compose.yml up -d --build - -# 2. create + migrate the OtOpcUa ConfigDb (one time; the design-time factory reads OTOPCUA_CONFIG_CONNECTION) -OTOPCUA_CONFIG_CONNECTION="Server=localhost,14330;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" \ - dotnet ef database update \ - --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \ - --startup-project src/Core/ZB.MOM.WW.OtOpcUa.Configuration - -# 3. apply the cluster/namespace/driver seed against the now-complete schema (idempotent) -docker compose -f docker-dev/docker-compose.yml run --rm cluster-seed -``` - -After the schema + seed exist, a plain `docker compose ... up -d` is enough — the named SQL volume keeps both across restarts (only `down -v` wipes them, which is when you repeat the steps above). +The first build takes a few minutes (.NET SDK image + restore + publish). **No manual schema step is needed** — on a fresh SQL volume the one-shot `migrator` service applies the EF migrations (the host nodes deliberately don't auto-migrate, since production owns schema changes), then `cluster-seed` populates the cluster/namespace/driver rows. `cluster-seed` and the host nodes wait for the migrator via `service_completed_successfully`, so nothing races an in-progress migration. A plain `docker compose ... up -d` on an existing volume is a fast no-op for both — the named SQL volume keeps the schema + rows across restarts; only `down -v` wipes them, after which the next `up` re-migrates + re-seeds automatically. ## Auth (dev only) diff --git a/docker-dev/docker-compose.yml b/docker-dev/docker-compose.yml index add0daaf..8493d326 100644 --- a/docker-dev/docker-compose.yml +++ b/docker-dev/docker-compose.yml @@ -70,15 +70,34 @@ services: timeout: 5s retries: 20 - # ── Cluster seed (one-shot) ──────────────────────────────────────────────── - # Waits for SQL + the host containers' EF auto-migration, then INSERTs the - # three ServerCluster rows and the six ClusterNode rows that scope each tenant - # inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops. - cluster-seed: - image: mcr.microsoft.com/mssql-tools:latest + # ── Migrator (one-shot) ──────────────────────────────────────────────────── + # Applies EF Core migrations to the OtOpcUa ConfigDb so a fresh SQL volume gets + # the schema with no operator step (the host nodes deliberately don't auto- + # migrate — production owns schema changes). cluster-seed + every host node + # depend on this completing, so nothing races an in-progress migration. + # Idempotent: a no-op once the schema is current. + migrator: + build: + context: .. + dockerfile: docker-dev/Dockerfile + target: migrator + image: otopcua-migrator:dev depends_on: sql: condition: service_healthy + environment: + OTOPCUA_CONFIG_CONNECTION: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" + restart: "no" + + # ── Cluster seed (one-shot) ──────────────────────────────────────────────── + # Runs only after `migrator` completes (so the schema is final — no race), then + # INSERTs the three ServerCluster rows and the six ClusterNode rows that scope + # each tenant inside the shared OtOpcUa ConfigDb. Idempotent — re-runs are no-ops. + cluster-seed: + image: mcr.microsoft.com/mssql-tools:latest + depends_on: + migrator: + condition: service_completed_successfully volumes: - ./seed:/seed:ro entrypoint: ["/bin/bash", "/seed/entrypoint.sh"] @@ -100,9 +119,11 @@ services: build: context: .. dockerfile: docker-dev/Dockerfile + target: runtime image: otopcua-host:dev depends_on: sql: { condition: service_healthy } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "admin,driver" ASPNETCORE_URLS: "http://+:9000" @@ -135,6 +156,7 @@ services: depends_on: sql: { condition: service_healthy } central-1: { condition: service_started } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "admin,driver" ASPNETCORE_URLS: "http://+:9000" @@ -172,6 +194,7 @@ services: depends_on: sql: { condition: service_healthy } central-1: { condition: service_started } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "driver" ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" @@ -191,6 +214,7 @@ services: depends_on: sql: { condition: service_healthy } central-1: { condition: service_started } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "driver" ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" @@ -210,6 +234,7 @@ services: depends_on: sql: { condition: service_healthy } central-1: { condition: service_started } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "driver" ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" @@ -227,6 +252,7 @@ services: depends_on: sql: { condition: service_healthy } central-1: { condition: service_started } + migrator: { condition: service_completed_successfully } environment: OTOPCUA_ROLES: "driver" ConnectionStrings__ConfigDb: "Server=sql,1433;Database=OtOpcUa;User Id=sa;Password=OtOpcUa!Dev123;TrustServerCertificate=True;" diff --git a/docker-dev/seed/entrypoint.sh b/docker-dev/seed/entrypoint.sh index 5da1f003..93fb77b5 100755 --- a/docker-dev/seed/entrypoint.sh +++ b/docker-dev/seed/entrypoint.sh @@ -1,20 +1,13 @@ #!/usr/bin/env bash -# docker-dev cluster-seed entrypoint. Waits for the OtOpcUa ConfigDb schema to -# be in place, then applies the idempotent row seed. +# docker-dev cluster-seed entrypoint. Applies the idempotent row seed. # -# IMPORTANT: this container does NOT run EF migrations — sqlcmd can't execute -# the V2 migration script cleanly because it contains CREATE PROCEDURE -# statements inside IF NOT EXISTS BEGIN ... END blocks (procs must be the -# first statement in their batch). Migrations are owned by the operator: -# -# dotnet ef database update \ -# --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration \ -# --startup-project src/Server/ZB.MOM.WW.OtOpcUa.Host -# -# (with ConnectionStrings__ConfigDb pointing at Server=localhost,14330;...). -# Once the schema is in place, restart the cluster-seed container — or just -# `docker compose up -d` and the seed will pick up where it left off thanks to -# the IF NOT EXISTS guards in seed-clusters.sql. +# This container does NOT run EF migrations — sqlcmd can't execute the migration +# script cleanly (it has CREATE PROCEDURE inside IF NOT EXISTS BEGIN ... END +# blocks; procs must be the first statement in their batch). The schema is owned +# by the `migrator` Compose service (dotnet ef), which this seed depends on via +# `service_completed_successfully` — so by the time we run, migrations are fully +# applied. The dbo.ServerCluster wait below is therefore just a fast sanity check. +# Re-runs are safe: every insert in seed-clusters.sql is IF NOT EXISTS-guarded. set -euo pipefail @@ -37,7 +30,7 @@ until run_sql_in master -Q "SELECT 1" >/dev/null 2>&1; do done echo "[cluster-seed] SQL Server up." -echo "[cluster-seed] waiting for ${DB} database + dbo.ServerCluster table (operator must run dotnet ef database update)..." +echo "[cluster-seed] verifying ${DB} schema (dbo.ServerCluster) is present (migrator should have applied it)..." until run_sql_in "$DB" -Q "IF OBJECT_ID('dbo.ServerCluster') IS NULL THROW 50001, 'missing', 1; SELECT 1" >/dev/null 2>&1; do sleep 3 done