- Move file size read after streams are disposed to get accurate compressed size - Clean up definition files to use working example queries - Add .gitignore for output directory
19 KiB
DbExporter Implementation Plan
For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
Goal: Build a CLI tool that exports database query results to compressed protobuf files.
Architecture: Single console app with modular components for definition parsing, database export, and verification.
Tech Stack: .NET 10, protobuf-net-data, ZstdSharp, Microsoft.Data.SqlClient, Oracle.ManagedDataAccess.Core
Task 1: Create Project Structure
Files:
- Create:
Tools/DbExporter/DbExporter.csproj - Create:
Tools/DbExporter/ExportDefinition.cs
Step 1: Create project directory
mkdir -p Tools/DbExporter
Step 2: Create csproj file
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="protobuf-net-data" Version="4.1.0" />
<PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
<PackageReference Include="Microsoft.Data.SqlClient" Version="5.2.2" />
<PackageReference Include="Oracle.ManagedDataAccess.Core" Version="23.7.0" />
</ItemGroup>
</Project>
Step 3: Create ExportDefinition model
using System.Text.Json.Serialization;
namespace DbExporter;
public sealed class ExportDefinition
{
[JsonPropertyName("providerType")]
public required string ProviderType { get; init; }
[JsonPropertyName("connectionString")]
public required string ConnectionString { get; init; }
[JsonPropertyName("query")]
public required string Query { get; init; }
[JsonPropertyName("outputPath")]
public required string OutputPath { get; init; }
[JsonPropertyName("compressionLevel")]
public int CompressionLevel { get; init; } = 10;
}
Step 4: Verify build
cd Tools/DbExporter && dotnet build
Step 5: Commit
git add Tools/DbExporter
git commit -m "feat(DbExporter): create project structure and definition model"
Task 2: Implement DatabaseExporter
Files:
- Create:
Tools/DbExporter/DatabaseExporter.cs
Step 1: Create DatabaseExporter class
using System.Data;
using System.Data.Common;
using System.Security.Cryptography;
using Microsoft.Data.SqlClient;
using Oracle.ManagedDataAccess.Client;
using ProtoBuf.Data;
using ZstdSharp;
namespace DbExporter;
public sealed class DatabaseExporter
{
public record ExportResult(int RowCount, long UncompressedSize, long CompressedSize, string Sha256Hash);
public async Task<ExportResult> ExportAsync(ExportDefinition definition, CancellationToken cancellationToken = default)
{
// Ensure output directory exists
var outputDir = Path.GetDirectoryName(definition.OutputPath);
if (!string.IsNullOrEmpty(outputDir))
Directory.CreateDirectory(outputDir);
await using var connection = CreateConnection(definition.ProviderType, definition.ConnectionString);
await connection.OpenAsync(cancellationToken);
await using var command = connection.CreateCommand();
command.CommandText = definition.Query;
command.CommandTimeout = 0; // No timeout for large exports
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
int rowCount = 0;
long uncompressedSize = 0;
// Use memory stream to capture uncompressed protobuf for SHA256
using var sha256 = SHA256.Create();
await using var outputFile = new FileStream(definition.OutputPath, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024);
await using var compressStream = new CompressionStream(outputFile, definition.CompressionLevel);
await using var hashStream = new CryptoStream(compressStream, sha256, CryptoStreamMode.Write);
// Serialize to protobuf
DataSerializer.Serialize(hashStream, reader);
// Count rows by re-reading (protobuf-net-data doesn't expose count during serialize)
// We'll track this differently - use a counting wrapper or post-verify
// For now, we serialize and then verify separately
hashStream.FlushFinalBlock();
uncompressedSize = hashStream.Length;
var hash = Convert.ToHexString(sha256.Hash!).ToLowerInvariant();
// Write sidecar hash file
var hashFilePath = definition.OutputPath + ".sha256";
await File.WriteAllTextAsync(hashFilePath, hash, cancellationToken);
var compressedSize = new FileInfo(definition.OutputPath).Length;
// Row count requires a separate pass or we estimate from verify
// Return 0 for now, verify will get accurate count
return new ExportResult(0, uncompressedSize, compressedSize, hash);
}
private static DbConnection CreateConnection(string providerType, string connectionString)
{
return providerType.ToLowerInvariant() switch
{
"sqlserver" => new SqlConnection(connectionString),
"oracle" => new OracleConnection(connectionString),
_ => throw new ArgumentException($"Unknown provider type: {providerType}. Use 'SqlServer' or 'Oracle'.")
};
}
}
Step 2: Verify build
cd Tools/DbExporter && dotnet build
Step 3: Commit
git add Tools/DbExporter/DatabaseExporter.cs
git commit -m "feat(DbExporter): implement database export with protobuf+zstd"
Task 3: Implement Verifier
Files:
- Create:
Tools/DbExporter/Verifier.cs
Step 1: Create Verifier class
using System.Data;
using System.Security.Cryptography;
using System.Text;
using ProtoBuf.Data;
using ZstdSharp;
namespace DbExporter;
public sealed class Verifier
{
public record VerifyResult(int RowCount, List<ColumnInfo> Schema, string? ComputedHash, string? ExpectedHash, bool? HashMatch);
public record ColumnInfo(string Name, Type Type);
public VerifyResult Verify(string filePath, bool computeHash = false)
{
using var inputFile = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024);
using var decompressStream = new DecompressionStream(inputFile);
Stream readStream = decompressStream;
SHA256? sha256 = null;
CryptoStream? hashStream = null;
if (computeHash)
{
sha256 = SHA256.Create();
hashStream = new CryptoStream(decompressStream, sha256, CryptoStreamMode.Read);
readStream = hashStream;
}
using var reader = DataSerializer.Deserialize(readStream);
// Extract schema
var schema = new List<ColumnInfo>();
for (int i = 0; i < reader.FieldCount; i++)
{
schema.Add(new ColumnInfo(reader.GetName(i), reader.GetFieldType(i)));
}
// Count rows
int rowCount = 0;
while (reader.Read())
{
rowCount++;
}
string? computedHashStr = null;
string? expectedHash = null;
bool? hashMatch = null;
if (computeHash && sha256 != null)
{
hashStream?.Dispose();
computedHashStr = Convert.ToHexString(sha256.Hash!).ToLowerInvariant();
// Read expected hash from sidecar
var hashFilePath = filePath + ".sha256";
if (File.Exists(hashFilePath))
{
expectedHash = File.ReadAllText(hashFilePath).Trim().ToLowerInvariant();
hashMatch = computedHashStr == expectedHash;
}
sha256.Dispose();
}
return new VerifyResult(rowCount, schema, computedHashStr, expectedHash, hashMatch);
}
public string FormatSchema(List<ColumnInfo> schema)
{
var sb = new StringBuilder();
foreach (var col in schema)
{
if (sb.Length > 0) sb.Append(", ");
sb.Append($"{col.Name} ({col.Type.Name})");
}
return sb.ToString();
}
}
Step 2: Verify build
cd Tools/DbExporter && dotnet build
Step 3: Commit
git add Tools/DbExporter/Verifier.cs
git commit -m "feat(DbExporter): implement verify and verify-full"
Task 4: Implement CLI Entry Point
Files:
- Create:
Tools/DbExporter/Program.cs
Step 1: Create Program.cs with CLI parsing
using System.Text.Json;
using DbExporter;
if (args.Length < 1 || args.Contains("--help") || args.Contains("-h"))
{
PrintUsage();
return args.Contains("--help") || args.Contains("-h") ? 0 : 1;
}
var definitionPath = args[0];
var verify = args.Contains("--verify");
var verifyFull = args.Contains("--verify-full");
if (!File.Exists(definitionPath))
{
Console.WriteLine($"Error: Definition file not found: {definitionPath}");
return 1;
}
try
{
var json = await File.ReadAllTextAsync(definitionPath);
var definition = JsonSerializer.Deserialize<ExportDefinition>(json);
if (definition is null)
{
Console.WriteLine("Error: Failed to parse definition file.");
return 1;
}
// Validate required fields
if (string.IsNullOrWhiteSpace(definition.ProviderType))
{
Console.WriteLine("Error: providerType is required.");
return 1;
}
if (string.IsNullOrWhiteSpace(definition.ConnectionString))
{
Console.WriteLine("Error: connectionString is required.");
return 1;
}
if (string.IsNullOrWhiteSpace(definition.Query))
{
Console.WriteLine("Error: query is required.");
return 1;
}
if (string.IsNullOrWhiteSpace(definition.OutputPath))
{
Console.WriteLine("Error: outputPath is required.");
return 1;
}
var exporter = new DatabaseExporter();
var verifier = new Verifier();
Console.WriteLine($"Exporting from {definition.ProviderType}...");
Console.WriteLine($"Query: {Truncate(definition.Query, 80)}");
var result = await exporter.ExportAsync(definition);
// Always do a quick verify to get row count
var quickVerify = verifier.Verify(definition.OutputPath, computeHash: false);
var ratio = result.CompressedSize > 0 && quickVerify.RowCount > 0
? $" ({(double)result.CompressedSize / result.UncompressedSize * 100:F1}%)"
: "";
Console.WriteLine($"✓ Exported: {quickVerify.RowCount:N0} rows, {result.UncompressedSize:N0} → {result.CompressedSize:N0} bytes{ratio}");
if (verify || verifyFull)
{
Console.WriteLine();
Console.WriteLine("Verifying...");
var verifyResult = verifier.Verify(definition.OutputPath, computeHash: verifyFull);
Console.WriteLine($"✓ Verified: {verifyResult.RowCount:N0} rows");
Console.WriteLine($"Schema: {verifier.FormatSchema(verifyResult.Schema)}");
if (verifyFull && verifyResult.HashMatch.HasValue)
{
if (verifyResult.HashMatch.Value)
{
Console.WriteLine($"✓ Checksum: SHA256 match ({verifyResult.ComputedHash})");
}
else
{
Console.WriteLine($"✗ Checksum: SHA256 MISMATCH");
Console.WriteLine($" Expected: {verifyResult.ExpectedHash}");
Console.WriteLine($" Computed: {verifyResult.ComputedHash}");
return 1;
}
}
}
return 0;
}
catch (Exception ex)
{
Console.WriteLine($"Error: {ex.Message}");
return 1;
}
static void PrintUsage()
{
Console.WriteLine("Usage: DbExporter <definition-file> [options]");
Console.WriteLine();
Console.WriteLine("Arguments:");
Console.WriteLine(" definition-file Path to JSON definition file");
Console.WriteLine();
Console.WriteLine("Options:");
Console.WriteLine(" --verify Verify output (row count + schema)");
Console.WriteLine(" --verify-full Verify output with SHA256 checksum");
Console.WriteLine(" --help Show this help");
Console.WriteLine();
Console.WriteLine("Definition file format:");
Console.WriteLine(" {");
Console.WriteLine(" \"providerType\": \"SqlServer\",");
Console.WriteLine(" \"connectionString\": \"Server=...;Database=...;\",");
Console.WriteLine(" \"query\": \"SELECT * FROM MyTable\",");
Console.WriteLine(" \"outputPath\": \"./output/mytable.pb.zstd\",");
Console.WriteLine(" \"compressionLevel\": 10");
Console.WriteLine(" }");
}
static string Truncate(string value, int maxLength)
{
if (string.IsNullOrEmpty(value)) return value;
var singleLine = value.Replace("\r", "").Replace("\n", " ");
return singleLine.Length <= maxLength ? singleLine : singleLine[..(maxLength - 3)] + "...";
}
Step 2: Verify build
cd Tools/DbExporter && dotnet build
Step 3: Commit
git add Tools/DbExporter/Program.cs
git commit -m "feat(DbExporter): implement CLI entry point"
Task 5: Fix Export Row Count Issue
The current implementation computes row count during verify, but can't get it during export (protobuf-net-data streams without counting). Let's fix this by wrapping the IDataReader.
Files:
- Create:
Tools/DbExporter/CountingDataReader.cs - Modify:
Tools/DbExporter/DatabaseExporter.cs
Step 1: Create CountingDataReader wrapper
using System.Data;
namespace DbExporter;
/// <summary>
/// Wraps an IDataReader to count rows as they're read.
/// </summary>
internal sealed class CountingDataReader : IDataReader
{
private readonly IDataReader _inner;
private int _rowCount;
public CountingDataReader(IDataReader inner)
{
_inner = inner;
}
public int RowCount => _rowCount;
public bool Read()
{
var result = _inner.Read();
if (result) _rowCount++;
return result;
}
// Delegate all other members to inner reader
public object this[int i] => _inner[i];
public object this[string name] => _inner[name];
public int Depth => _inner.Depth;
public bool IsClosed => _inner.IsClosed;
public int RecordsAffected => _inner.RecordsAffected;
public int FieldCount => _inner.FieldCount;
public void Close() => _inner.Close();
public void Dispose() => _inner.Dispose();
public bool GetBoolean(int i) => _inner.GetBoolean(i);
public byte GetByte(int i) => _inner.GetByte(i);
public long GetBytes(int i, long fieldOffset, byte[]? buffer, int bufferoffset, int length) => _inner.GetBytes(i, fieldOffset, buffer, bufferoffset, length);
public char GetChar(int i) => _inner.GetChar(i);
public long GetChars(int i, long fieldoffset, char[]? buffer, int bufferoffset, int length) => _inner.GetChars(i, fieldoffset, buffer, bufferoffset, length);
public IDataReader GetData(int i) => _inner.GetData(i);
public string GetDataTypeName(int i) => _inner.GetDataTypeName(i);
public DateTime GetDateTime(int i) => _inner.GetDateTime(i);
public decimal GetDecimal(int i) => _inner.GetDecimal(i);
public double GetDouble(int i) => _inner.GetDouble(i);
public Type GetFieldType(int i) => _inner.GetFieldType(i);
public float GetFloat(int i) => _inner.GetFloat(i);
public Guid GetGuid(int i) => _inner.GetGuid(i);
public short GetInt16(int i) => _inner.GetInt16(i);
public int GetInt32(int i) => _inner.GetInt32(i);
public long GetInt64(int i) => _inner.GetInt64(i);
public string GetName(int i) => _inner.GetName(i);
public int GetOrdinal(string name) => _inner.GetOrdinal(name);
public DataTable GetSchemaTable() => _inner.GetSchemaTable()!;
public string GetString(int i) => _inner.GetString(i);
public object GetValue(int i) => _inner.GetValue(i);
public int GetValues(object[] values) => _inner.GetValues(values);
public bool IsDBNull(int i) => _inner.IsDBNull(i);
public bool NextResult() => _inner.NextResult();
}
Step 2: Update DatabaseExporter to use CountingDataReader
Update the ExportAsync method to wrap the reader:
// Replace this line:
await using var reader = await command.ExecuteReaderAsync(cancellationToken);
// With:
await using var baseReader = await command.ExecuteReaderAsync(cancellationToken);
var reader = new CountingDataReader(baseReader);
// And update the return to use reader.RowCount instead of 0
Step 3: Verify build
cd Tools/DbExporter && dotnet build
Step 4: Commit
git add Tools/DbExporter/CountingDataReader.cs Tools/DbExporter/DatabaseExporter.cs
git commit -m "feat(DbExporter): add counting data reader for accurate row count"
Task 6: Create ScadaBridge Definition Files
Files:
- Create:
Tools/DbExporter/definitions/directory with definition files
Step 1: Create definitions directory
mkdir -p Tools/DbExporter/definitions
Step 2: Query ScadaBridge to list tables
First, we need to discover what tables exist. Run a quick query to list tables:
SELECT TABLE_SCHEMA, TABLE_NAME
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
ORDER BY TABLE_SCHEMA, TABLE_NAME
Step 3: Create definition files for key tables
Create definition files based on discovered tables. Example for Config.ScadaClients:
{
"providerType": "SqlServer",
"connectionString": "Server=10.100.0.35;Database=ScadaBridge_Test;User Id=sa;Password=ScadaBridge2024;TrustServerCertificate=true;",
"query": "SELECT * FROM Config.OpcUaClients",
"outputPath": "./output/opcua-clients.pb.zstd",
"compressionLevel": 10
}
Step 4: Commit
git add Tools/DbExporter/definitions/
git commit -m "feat(DbExporter): add ScadaBridge definition files"
Task 7: Test Export and Verify
Step 1: Build the tool
cd Tools/DbExporter && dotnet build
Step 2: Run export for a small table first
dotnet run -- definitions/opcua-clients.json --verify
Step 3: Run verify-full
dotnet run -- definitions/opcua-clients.json --verify-full
Step 4: Test with larger tables if available
Run exports on additional definition files and verify they work correctly.
Step 5: Commit any fixes needed
git add -A
git commit -m "fix(DbExporter): address issues found during testing"
Task 8: Final Cleanup and Documentation
Files:
- Update:
Tools/DbExporter/README.md(optional)
Step 1: Review all files for cleanup
- Remove any debug code
- Ensure consistent formatting
- Check for any TODO comments
Step 2: Final build and test
cd Tools/DbExporter && dotnet build
dotnet run -- --help
Step 3: Commit
git add -A
git commit -m "chore(DbExporter): final cleanup"