Files
jdescopingtool/PLANS/2026-01-06-dbexporter-implementation.md
T
Joseph Doherty d2136cacf7 fix(DbExporter): fix compressed size calculation and clean up
- Move file size read after streams are disposed to get accurate compressed size
- Clean up definition files to use working example queries
- Add .gitignore for output directory
2026-01-06 17:06:16 -05:00

19 KiB

DbExporter Implementation Plan

For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.

Goal: Build a CLI tool that exports database query results to compressed protobuf files.

Architecture: Single console app with modular components for definition parsing, database export, and verification.

Tech Stack: .NET 10, protobuf-net-data, ZstdSharp, Microsoft.Data.SqlClient, Oracle.ManagedDataAccess.Core


Task 1: Create Project Structure

Files:

  • Create: Tools/DbExporter/DbExporter.csproj
  • Create: Tools/DbExporter/ExportDefinition.cs

Step 1: Create project directory

mkdir -p Tools/DbExporter

Step 2: Create csproj file

<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net10.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="protobuf-net-data" Version="4.1.0" />
    <PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
    <PackageReference Include="Microsoft.Data.SqlClient" Version="5.2.2" />
    <PackageReference Include="Oracle.ManagedDataAccess.Core" Version="23.7.0" />
  </ItemGroup>

</Project>

Step 3: Create ExportDefinition model

using System.Text.Json.Serialization;

namespace DbExporter;

public sealed class ExportDefinition
{
    [JsonPropertyName("providerType")]
    public required string ProviderType { get; init; }

    [JsonPropertyName("connectionString")]
    public required string ConnectionString { get; init; }

    [JsonPropertyName("query")]
    public required string Query { get; init; }

    [JsonPropertyName("outputPath")]
    public required string OutputPath { get; init; }

    [JsonPropertyName("compressionLevel")]
    public int CompressionLevel { get; init; } = 10;
}

Step 4: Verify build

cd Tools/DbExporter && dotnet build

Step 5: Commit

git add Tools/DbExporter
git commit -m "feat(DbExporter): create project structure and definition model"

Task 2: Implement DatabaseExporter

Files:

  • Create: Tools/DbExporter/DatabaseExporter.cs

Step 1: Create DatabaseExporter class

using System.Data;
using System.Data.Common;
using System.Security.Cryptography;
using Microsoft.Data.SqlClient;
using Oracle.ManagedDataAccess.Client;
using ProtoBuf.Data;
using ZstdSharp;

namespace DbExporter;

public sealed class DatabaseExporter
{
    public record ExportResult(int RowCount, long UncompressedSize, long CompressedSize, string Sha256Hash);

    public async Task<ExportResult> ExportAsync(ExportDefinition definition, CancellationToken cancellationToken = default)
    {
        // Ensure output directory exists
        var outputDir = Path.GetDirectoryName(definition.OutputPath);
        if (!string.IsNullOrEmpty(outputDir))
            Directory.CreateDirectory(outputDir);

        await using var connection = CreateConnection(definition.ProviderType, definition.ConnectionString);
        await connection.OpenAsync(cancellationToken);

        await using var command = connection.CreateCommand();
        command.CommandText = definition.Query;
        command.CommandTimeout = 0; // No timeout for large exports

        await using var reader = await command.ExecuteReaderAsync(cancellationToken);

        int rowCount = 0;
        long uncompressedSize = 0;

        // Use memory stream to capture uncompressed protobuf for SHA256
        using var sha256 = SHA256.Create();
        await using var outputFile = new FileStream(definition.OutputPath, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024);
        await using var compressStream = new CompressionStream(outputFile, definition.CompressionLevel);
        await using var hashStream = new CryptoStream(compressStream, sha256, CryptoStreamMode.Write);

        // Serialize to protobuf
        DataSerializer.Serialize(hashStream, reader);

        // Count rows by re-reading (protobuf-net-data doesn't expose count during serialize)
        // We'll track this differently - use a counting wrapper or post-verify
        // For now, we serialize and then verify separately

        hashStream.FlushFinalBlock();
        uncompressedSize = hashStream.Length;

        var hash = Convert.ToHexString(sha256.Hash!).ToLowerInvariant();

        // Write sidecar hash file
        var hashFilePath = definition.OutputPath + ".sha256";
        await File.WriteAllTextAsync(hashFilePath, hash, cancellationToken);

        var compressedSize = new FileInfo(definition.OutputPath).Length;

        // Row count requires a separate pass or we estimate from verify
        // Return 0 for now, verify will get accurate count
        return new ExportResult(0, uncompressedSize, compressedSize, hash);
    }

    private static DbConnection CreateConnection(string providerType, string connectionString)
    {
        return providerType.ToLowerInvariant() switch
        {
            "sqlserver" => new SqlConnection(connectionString),
            "oracle" => new OracleConnection(connectionString),
            _ => throw new ArgumentException($"Unknown provider type: {providerType}. Use 'SqlServer' or 'Oracle'.")
        };
    }
}

Step 2: Verify build

cd Tools/DbExporter && dotnet build

Step 3: Commit

git add Tools/DbExporter/DatabaseExporter.cs
git commit -m "feat(DbExporter): implement database export with protobuf+zstd"

Task 3: Implement Verifier

Files:

  • Create: Tools/DbExporter/Verifier.cs

Step 1: Create Verifier class

using System.Data;
using System.Security.Cryptography;
using System.Text;
using ProtoBuf.Data;
using ZstdSharp;

namespace DbExporter;

public sealed class Verifier
{
    public record VerifyResult(int RowCount, List<ColumnInfo> Schema, string? ComputedHash, string? ExpectedHash, bool? HashMatch);
    public record ColumnInfo(string Name, Type Type);

    public VerifyResult Verify(string filePath, bool computeHash = false)
    {
        using var inputFile = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024);
        using var decompressStream = new DecompressionStream(inputFile);

        Stream readStream = decompressStream;
        SHA256? sha256 = null;
        CryptoStream? hashStream = null;

        if (computeHash)
        {
            sha256 = SHA256.Create();
            hashStream = new CryptoStream(decompressStream, sha256, CryptoStreamMode.Read);
            readStream = hashStream;
        }

        using var reader = DataSerializer.Deserialize(readStream);

        // Extract schema
        var schema = new List<ColumnInfo>();
        for (int i = 0; i < reader.FieldCount; i++)
        {
            schema.Add(new ColumnInfo(reader.GetName(i), reader.GetFieldType(i)));
        }

        // Count rows
        int rowCount = 0;
        while (reader.Read())
        {
            rowCount++;
        }

        string? computedHashStr = null;
        string? expectedHash = null;
        bool? hashMatch = null;

        if (computeHash && sha256 != null)
        {
            hashStream?.Dispose();
            computedHashStr = Convert.ToHexString(sha256.Hash!).ToLowerInvariant();

            // Read expected hash from sidecar
            var hashFilePath = filePath + ".sha256";
            if (File.Exists(hashFilePath))
            {
                expectedHash = File.ReadAllText(hashFilePath).Trim().ToLowerInvariant();
                hashMatch = computedHashStr == expectedHash;
            }

            sha256.Dispose();
        }

        return new VerifyResult(rowCount, schema, computedHashStr, expectedHash, hashMatch);
    }

    public string FormatSchema(List<ColumnInfo> schema)
    {
        var sb = new StringBuilder();
        foreach (var col in schema)
        {
            if (sb.Length > 0) sb.Append(", ");
            sb.Append($"{col.Name} ({col.Type.Name})");
        }
        return sb.ToString();
    }
}

Step 2: Verify build

cd Tools/DbExporter && dotnet build

Step 3: Commit

git add Tools/DbExporter/Verifier.cs
git commit -m "feat(DbExporter): implement verify and verify-full"

Task 4: Implement CLI Entry Point

Files:

  • Create: Tools/DbExporter/Program.cs

Step 1: Create Program.cs with CLI parsing

using System.Text.Json;
using DbExporter;

if (args.Length < 1 || args.Contains("--help") || args.Contains("-h"))
{
    PrintUsage();
    return args.Contains("--help") || args.Contains("-h") ? 0 : 1;
}

var definitionPath = args[0];
var verify = args.Contains("--verify");
var verifyFull = args.Contains("--verify-full");

if (!File.Exists(definitionPath))
{
    Console.WriteLine($"Error: Definition file not found: {definitionPath}");
    return 1;
}

try
{
    var json = await File.ReadAllTextAsync(definitionPath);
    var definition = JsonSerializer.Deserialize<ExportDefinition>(json);

    if (definition is null)
    {
        Console.WriteLine("Error: Failed to parse definition file.");
        return 1;
    }

    // Validate required fields
    if (string.IsNullOrWhiteSpace(definition.ProviderType))
    {
        Console.WriteLine("Error: providerType is required.");
        return 1;
    }
    if (string.IsNullOrWhiteSpace(definition.ConnectionString))
    {
        Console.WriteLine("Error: connectionString is required.");
        return 1;
    }
    if (string.IsNullOrWhiteSpace(definition.Query))
    {
        Console.WriteLine("Error: query is required.");
        return 1;
    }
    if (string.IsNullOrWhiteSpace(definition.OutputPath))
    {
        Console.WriteLine("Error: outputPath is required.");
        return 1;
    }

    var exporter = new DatabaseExporter();
    var verifier = new Verifier();

    Console.WriteLine($"Exporting from {definition.ProviderType}...");
    Console.WriteLine($"Query: {Truncate(definition.Query, 80)}");

    var result = await exporter.ExportAsync(definition);

    // Always do a quick verify to get row count
    var quickVerify = verifier.Verify(definition.OutputPath, computeHash: false);

    var ratio = result.CompressedSize > 0 && quickVerify.RowCount > 0
        ? $" ({(double)result.CompressedSize / result.UncompressedSize * 100:F1}%)"
        : "";

    Console.WriteLine($"✓ Exported: {quickVerify.RowCount:N0} rows, {result.UncompressedSize:N0} → {result.CompressedSize:N0} bytes{ratio}");

    if (verify || verifyFull)
    {
        Console.WriteLine();
        Console.WriteLine("Verifying...");

        var verifyResult = verifier.Verify(definition.OutputPath, computeHash: verifyFull);

        Console.WriteLine($"✓ Verified: {verifyResult.RowCount:N0} rows");
        Console.WriteLine($"Schema: {verifier.FormatSchema(verifyResult.Schema)}");

        if (verifyFull && verifyResult.HashMatch.HasValue)
        {
            if (verifyResult.HashMatch.Value)
            {
                Console.WriteLine($"✓ Checksum: SHA256 match ({verifyResult.ComputedHash})");
            }
            else
            {
                Console.WriteLine($"✗ Checksum: SHA256 MISMATCH");
                Console.WriteLine($"  Expected: {verifyResult.ExpectedHash}");
                Console.WriteLine($"  Computed: {verifyResult.ComputedHash}");
                return 1;
            }
        }
    }

    return 0;
}
catch (Exception ex)
{
    Console.WriteLine($"Error: {ex.Message}");
    return 1;
}

static void PrintUsage()
{
    Console.WriteLine("Usage: DbExporter <definition-file> [options]");
    Console.WriteLine();
    Console.WriteLine("Arguments:");
    Console.WriteLine("  definition-file    Path to JSON definition file");
    Console.WriteLine();
    Console.WriteLine("Options:");
    Console.WriteLine("  --verify          Verify output (row count + schema)");
    Console.WriteLine("  --verify-full     Verify output with SHA256 checksum");
    Console.WriteLine("  --help            Show this help");
    Console.WriteLine();
    Console.WriteLine("Definition file format:");
    Console.WriteLine("  {");
    Console.WriteLine("    \"providerType\": \"SqlServer\",");
    Console.WriteLine("    \"connectionString\": \"Server=...;Database=...;\",");
    Console.WriteLine("    \"query\": \"SELECT * FROM MyTable\",");
    Console.WriteLine("    \"outputPath\": \"./output/mytable.pb.zstd\",");
    Console.WriteLine("    \"compressionLevel\": 10");
    Console.WriteLine("  }");
}

static string Truncate(string value, int maxLength)
{
    if (string.IsNullOrEmpty(value)) return value;
    var singleLine = value.Replace("\r", "").Replace("\n", " ");
    return singleLine.Length <= maxLength ? singleLine : singleLine[..(maxLength - 3)] + "...";
}

Step 2: Verify build

cd Tools/DbExporter && dotnet build

Step 3: Commit

git add Tools/DbExporter/Program.cs
git commit -m "feat(DbExporter): implement CLI entry point"

Task 5: Fix Export Row Count Issue

The current implementation computes row count during verify, but can't get it during export (protobuf-net-data streams without counting). Let's fix this by wrapping the IDataReader.

Files:

  • Create: Tools/DbExporter/CountingDataReader.cs
  • Modify: Tools/DbExporter/DatabaseExporter.cs

Step 1: Create CountingDataReader wrapper

using System.Data;

namespace DbExporter;

/// <summary>
/// Wraps an IDataReader to count rows as they're read.
/// </summary>
internal sealed class CountingDataReader : IDataReader
{
    private readonly IDataReader _inner;
    private int _rowCount;

    public CountingDataReader(IDataReader inner)
    {
        _inner = inner;
    }

    public int RowCount => _rowCount;

    public bool Read()
    {
        var result = _inner.Read();
        if (result) _rowCount++;
        return result;
    }

    // Delegate all other members to inner reader
    public object this[int i] => _inner[i];
    public object this[string name] => _inner[name];
    public int Depth => _inner.Depth;
    public bool IsClosed => _inner.IsClosed;
    public int RecordsAffected => _inner.RecordsAffected;
    public int FieldCount => _inner.FieldCount;
    public void Close() => _inner.Close();
    public void Dispose() => _inner.Dispose();
    public bool GetBoolean(int i) => _inner.GetBoolean(i);
    public byte GetByte(int i) => _inner.GetByte(i);
    public long GetBytes(int i, long fieldOffset, byte[]? buffer, int bufferoffset, int length) => _inner.GetBytes(i, fieldOffset, buffer, bufferoffset, length);
    public char GetChar(int i) => _inner.GetChar(i);
    public long GetChars(int i, long fieldoffset, char[]? buffer, int bufferoffset, int length) => _inner.GetChars(i, fieldoffset, buffer, bufferoffset, length);
    public IDataReader GetData(int i) => _inner.GetData(i);
    public string GetDataTypeName(int i) => _inner.GetDataTypeName(i);
    public DateTime GetDateTime(int i) => _inner.GetDateTime(i);
    public decimal GetDecimal(int i) => _inner.GetDecimal(i);
    public double GetDouble(int i) => _inner.GetDouble(i);
    public Type GetFieldType(int i) => _inner.GetFieldType(i);
    public float GetFloat(int i) => _inner.GetFloat(i);
    public Guid GetGuid(int i) => _inner.GetGuid(i);
    public short GetInt16(int i) => _inner.GetInt16(i);
    public int GetInt32(int i) => _inner.GetInt32(i);
    public long GetInt64(int i) => _inner.GetInt64(i);
    public string GetName(int i) => _inner.GetName(i);
    public int GetOrdinal(string name) => _inner.GetOrdinal(name);
    public DataTable GetSchemaTable() => _inner.GetSchemaTable()!;
    public string GetString(int i) => _inner.GetString(i);
    public object GetValue(int i) => _inner.GetValue(i);
    public int GetValues(object[] values) => _inner.GetValues(values);
    public bool IsDBNull(int i) => _inner.IsDBNull(i);
    public bool NextResult() => _inner.NextResult();
}

Step 2: Update DatabaseExporter to use CountingDataReader

Update the ExportAsync method to wrap the reader:

// Replace this line:
await using var reader = await command.ExecuteReaderAsync(cancellationToken);

// With:
await using var baseReader = await command.ExecuteReaderAsync(cancellationToken);
var reader = new CountingDataReader(baseReader);

// And update the return to use reader.RowCount instead of 0

Step 3: Verify build

cd Tools/DbExporter && dotnet build

Step 4: Commit

git add Tools/DbExporter/CountingDataReader.cs Tools/DbExporter/DatabaseExporter.cs
git commit -m "feat(DbExporter): add counting data reader for accurate row count"

Task 6: Create ScadaBridge Definition Files

Files:

  • Create: Tools/DbExporter/definitions/ directory with definition files

Step 1: Create definitions directory

mkdir -p Tools/DbExporter/definitions

Step 2: Query ScadaBridge to list tables

First, we need to discover what tables exist. Run a quick query to list tables:

SELECT TABLE_SCHEMA, TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_SCHEMA, TABLE_NAME

Step 3: Create definition files for key tables

Create definition files based on discovered tables. Example for Config.ScadaClients:

{
  "providerType": "SqlServer",
  "connectionString": "Server=10.100.0.35;Database=ScadaBridge_Test;User Id=sa;Password=ScadaBridge2024;TrustServerCertificate=true;",
  "query": "SELECT * FROM Config.OpcUaClients",
  "outputPath": "./output/opcua-clients.pb.zstd",
  "compressionLevel": 10
}

Step 4: Commit

git add Tools/DbExporter/definitions/
git commit -m "feat(DbExporter): add ScadaBridge definition files"

Task 7: Test Export and Verify

Step 1: Build the tool

cd Tools/DbExporter && dotnet build

Step 2: Run export for a small table first

dotnet run -- definitions/opcua-clients.json --verify

Step 3: Run verify-full

dotnet run -- definitions/opcua-clients.json --verify-full

Step 4: Test with larger tables if available

Run exports on additional definition files and verify they work correctly.

Step 5: Commit any fixes needed

git add -A
git commit -m "fix(DbExporter): address issues found during testing"

Task 8: Final Cleanup and Documentation

Files:

  • Update: Tools/DbExporter/README.md (optional)

Step 1: Review all files for cleanup

  • Remove any debug code
  • Ensure consistent formatting
  • Check for any TODO comments

Step 2: Final build and test

cd Tools/DbExporter && dotnet build
dotnet run -- --help

Step 3: Commit

git add -A
git commit -m "chore(DbExporter): final cleanup"