Files
scadalink-design/infra/tools/dump_seed.py
Joseph Doherty 932fda5594 infra(seed): dump encrypted secret columns as NULL, restore via CLI
ASP.NET Data Protection ciphertext is non-deterministic and bound to the
source key ring, so encrypted secret columns (ExternalSystemDefinitions
.AuthConfiguration, SmtpConfigurations.Credentials, DatabaseConnection
Definitions.ConnectionString) cannot be replayed from a static SQL dump —
the app would fail to decrypt them. dump_seed.py now emits those columns
as NULL; reseed.sh adds a post-seed stage that recreates the values
through the ScadaLink CLI so the EF value converter re-encrypts against
the target cluster's key ring.
2026-05-21 01:29:51 -04:00

252 lines
9.2 KiB
Python
Executable File

#!/usr/bin/env python3
"""Dump design tables from ScadaLinkConfig to a replayable SQL seed file.
Usage:
python3 infra/tools/dump_seed.py --output infra/mssql/seed-config.sql
Tables covered (insert order; reverse for delete):
TemplateFolders, Templates, TemplateAttributes, TemplateScripts,
TemplateAlarms, TemplateCompositions, SharedScripts, DataConnections,
ExternalSystemDefinitions, ExternalSystemMethods
Excluded by design (per-environment, not design-time): Sites (seeded via
seed-sites.sh), Instances + InstanceConnectionBindings + InstanceOverrides,
NotificationLists/Recipients, SmtpConfigurations, ApiKeys, Areas,
SiteScopeRules, LdapGroupMappings, DataProtectionKeys, audit, deployment.
Encrypted secret columns (see ENCRYPTED_COLUMNS) are emitted as NULL: they
hold ASP.NET Data Protection ciphertext, which is non-deterministic and bound
to the source key ring, so a raw SQL dump can never replay a valid value.
Re-populate them through the application after the seed runs (infra/reseed.sh
does this via the ScadaLink CLI).
"""
import argparse
import datetime
import sys
import pymssql
DEFAULT_HOST = "localhost"
DEFAULT_PORT = 1433
DEFAULT_USER = "sa"
DEFAULT_PASSWORD = "ScadaLink_Dev1#"
DEFAULT_DATABASE = "ScadaLinkConfig"
INSERT_ORDER = [
"TemplateFolders",
"Templates",
"TemplateAttributes",
"TemplateScripts",
"TemplateAlarms",
"TemplateCompositions",
"SharedScripts",
"DataConnections",
"ExternalSystemDefinitions",
"ExternalSystemMethods",
]
# Identity columns get IDENTITY_INSERT wrapped around inserts and are kept in
# the column list. All listed tables happen to use Id as their identity.
IDENTITY_TABLES = set(INSERT_ORDER)
# (table, column) pairs encrypted at rest via ASP.NET Data Protection
# (EncryptedStringConverter in ScadaLink.ConfigurationDatabase). Ciphertext is
# non-deterministic and key-ring-bound, so it cannot be replayed from a static
# SQL dump — the application would fail to decrypt it on read. These columns
# are dumped as NULL; re-seed their values through the app (CLI / API) so the
# value converter encrypts them against the target key ring.
ENCRYPTED_COLUMNS = {
("ExternalSystemDefinitions", "AuthConfiguration"),
("SmtpConfigurations", "Credentials"),
("DatabaseConnectionDefinitions", "ConnectionString"),
}
# Templates has self-FK Templates.ParentTemplateId; emit a single batch that
# inserts shallow rows first then deeper ones. pymssql returns rows in Id order
# from our ORDER BY, which matches insertion order for this schema (parent Id
# is always less than child Id in the live data).
def quote(value):
if value is None:
return "NULL"
if isinstance(value, bool):
return "1" if value else "0"
if isinstance(value, (int, float)):
return str(value)
if isinstance(value, (bytes, bytearray)):
return "0x" + value.hex()
if isinstance(value, datetime.datetime):
return "'" + value.isoformat(sep=" ", timespec="microseconds") + "'"
if isinstance(value, datetime.date):
return "'" + value.isoformat() + "'"
if isinstance(value, datetime.time):
return "'" + value.isoformat(timespec="microseconds") + "'"
if isinstance(value, datetime.timedelta):
total = value.total_seconds()
hours, rem = divmod(int(total), 3600)
minutes, seconds = divmod(rem, 60)
micros = value.microseconds
return "'{:02d}:{:02d}:{:02d}.{:06d}'".format(hours, minutes, seconds, micros)
text = str(value).replace("'", "''")
return "N'" + text + "'"
def get_columns(cursor, table):
cursor.execute(
"""
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = %s
ORDER BY ORDINAL_POSITION
""",
(table,),
)
return [row[0] for row in cursor.fetchall()]
def dump(args):
conn = pymssql.connect(
server=args.host,
port=args.port,
user=args.user,
password=args.password,
database=args.database,
)
cursor = conn.cursor()
out = []
out.append("-- ScadaLink design-data seed.")
out.append("-- Auto-generated by infra/tools/dump_seed.py against " + args.database + ".")
out.append("-- Replays the design-time configuration (templates, scripts,")
out.append("-- data connections, external systems). Idempotent: deletes")
out.append("-- existing rows in the covered tables before inserting.")
out.append("--")
out.append("-- Excluded: Sites (seed via docker/seed-sites.sh), Instances,")
out.append("-- InstanceConnectionBindings, notifications, SMTP, API keys,")
out.append("-- areas, LDAP mappings.")
out.append("")
out.append("SET NOCOUNT ON;")
out.append("SET XACT_ABORT ON;")
# sqlcmd defaults QUOTED_IDENTIFIER OFF; EF Core's filtered indexes
# and computed columns require ON, so force it here.
out.append("SET QUOTED_IDENTIFIER ON;")
out.append("BEGIN TRAN;")
out.append("")
# Wipe in reverse FK order. Beyond the design tables themselves, we also
# clear instance + deployment rows because they FK to Templates and
# DataConnections; without this, an idempotent replay against a populated
# DB fails on the FK to DataConnections. On a fresh reseed (after
# teardown.sh) these tables are already empty so the DELETEs are no-ops.
out.append("-- Wipe existing design + dependent rows so the seed is idempotent.")
out.append("-- Order matters: dependents first.")
delete_order = [
# Dependents on Instances / DataConnections / Sites.
"DeployedConfigSnapshots",
"DeploymentRecords",
"InstanceAlarmOverrides",
"InstanceAttributeOverrides",
"InstanceConnectionBindings",
"Instances",
# Design tables themselves.
"ExternalSystemMethods",
"ExternalSystemDefinitions",
"DataConnections",
"SharedScripts",
"TemplateCompositions",
# Alarms reference scripts via OnTriggerScriptId; null it first so we
# can delete scripts without FK violations.
"UPDATE TemplateAlarms SET OnTriggerScriptId = NULL",
"TemplateAlarms",
"TemplateScripts",
"TemplateAttributes",
# Templates is self-referential and references TemplateCompositions
# (OwnerCompositionId); null parent links first.
"UPDATE Templates SET ParentTemplateId = NULL, OwnerCompositionId = NULL",
"Templates",
# Folders is self-referential too.
"UPDATE TemplateFolders SET ParentFolderId = NULL",
"TemplateFolders",
]
for step in delete_order:
if step.startswith("UPDATE "):
out.append(step + ";")
else:
out.append("DELETE FROM " + step + ";")
out.append("")
for table in INSERT_ORDER:
columns = get_columns(cursor, table)
if not columns:
print("Skipping {} (no columns found)".format(table), file=sys.stderr)
continue
# Order by Id so self-referential rows insert in dependency order
# (in the live data, parent Id < child Id by construction).
order_clause = "ORDER BY Id" if "Id" in columns else ""
cursor.execute(
"SELECT [{}] FROM [{}] {}".format("], [".join(columns), table, order_clause)
)
rows = cursor.fetchall()
out.append("-- " + table + " (" + str(len(rows)) + " rows)")
# Columns encrypted at rest cannot be dumped verbatim; emit NULL and
# note it so the secret value is restored through the app afterwards.
nulled = [c for c in columns if (table, c) in ENCRYPTED_COLUMNS]
for c in nulled:
out.append(
"-- NOTE: [{}] is an encrypted secret column — dumped as NULL. "
"Restore via the app (CLI/API) post-seed.".format(c)
)
if not rows:
continue
col_list = ", ".join("[" + c + "]" for c in columns)
identity = table in IDENTITY_TABLES
if identity:
out.append("SET IDENTITY_INSERT [{}] ON;".format(table))
for row in rows:
values = ", ".join(
"NULL" if (table, c) in ENCRYPTED_COLUMNS else quote(v)
for c, v in zip(columns, row)
)
out.append(
"INSERT INTO [{}] ({}) VALUES ({});".format(table, col_list, values)
)
if identity:
out.append("SET IDENTITY_INSERT [{}] OFF;".format(table))
out.append("")
out.append("COMMIT;")
out.append("")
sql = "\n".join(out)
with open(args.output, "w") as f:
f.write(sql)
print("Wrote " + args.output + " (" + str(sum(1 for line in out if line.startswith('INSERT'))) + " inserts).")
cursor.close()
conn.close()
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--host", default=DEFAULT_HOST)
parser.add_argument("--port", type=int, default=DEFAULT_PORT)
parser.add_argument("--user", default=DEFAULT_USER)
parser.add_argument("--password", default=DEFAULT_PASSWORD)
parser.add_argument("--database", default=DEFAULT_DATABASE)
parser.add_argument("--output", required=True, help="Path to write seed SQL")
args = parser.parse_args()
dump(args)
if __name__ == "__main__":
main()