From bafebb27037b3a11ac872fed5ec1f8efeffb608a Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohertj2@Josephs-MacBook-Pro.local>
Date: Sat, 3 Jan 2026 15:41:21 -0500
Subject: [PATCH] docs: add ETL troubleshooting documentation

---
 DOCUMENTATION/DataSync/Troubleshooting.md | 233 ++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 DOCUMENTATION/DataSync/Troubleshooting.md

diff --git a/DOCUMENTATION/DataSync/Troubleshooting.md b/DOCUMENTATION/DataSync/Troubleshooting.md
new file mode 100644
index 0000000..d0ffbe6
--- /dev/null
+++ b/DOCUMENTATION/DataSync/Troubleshooting.md
@@ -0,0 +1,233 @@
+# Troubleshooting
+
+This document covers common errors, debugging patterns, and performance tuning for the ETL pipeline.
+
+## Common Errors
+
+### Column mapping errors
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| "No columns from source exist in destination table" | Source column names don't match destination | Check source query column aliases match destination table columns exactly (case-insensitive) |
+| "Column name collision" | Transformer creates duplicate column names | Review rename mappings; ensure no two columns map to the same output name |
+| "Column '{name}' not found or was dropped" | Accessing a column that was dropped | Check transformer chain order; don't access dropped columns in later transformers |
+
+### Computed column errors
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| "GetBytes not supported for computed column at ordinal N" | Binary access on transformed column | Use `GetValue()` instead; computed columns (like JDE dates) don't support binary access |
+| "GetChars not supported for computed column at ordinal N" | Same as above | Use `GetValue()` or `GetString()` |
+| "GetData not supported for computed column at ordinal N" | Same as above | Computed columns can't return nested readers |
+
+### Timeout errors
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| `SqlException: Timeout expired` during bulk copy | Large dataset, slow network | Increase `commandTimeoutSeconds` on destination |
+| `SqlException: Timeout expired` during MERGE | Many rows to match | Increase timeout; consider smaller batches |
+| `SqlException: Timeout expired` during script | Index rebuild on large table | Increase script `timeoutSeconds` (default 3600s for rebuild) |
+
+### Validation errors
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| "Source is required. Call WithSource() before Build()" | Missing source in pipeline | Add `.WithSource()` to builder chain |
+| "Destination is required. Call WithDestination() before Build()" | Missing destination in pipeline | Add `.WithDestination()` to builder chain |
+| "At least one match column is required" | Empty matchColumns array | Provide key columns for MERGE matching |
+| "Timeout must be between 0 and 24 hours" | Invalid timeout value | Use `TimeSpan` between 0 and 24 hours |
+
+## Debugging Patterns
+
+### Inspecting pipeline results
+
+Check `PipelineResult` after execution to understand what happened:
+
+```csharp
+var result = await pipeline.ExecuteAsync(cancellationToken);
+
+if (!result.Success)
+{
+    logger.LogError(result.Error, "Pipeline failed after {Rows} rows in {Elapsed}",
+        result.TotalRows, result.Elapsed);
+
+    // Find which step failed
+    var lastStep = result.Steps.LastOrDefault();
+    if (lastStep != null)
+    {
+        logger.LogError("Failed at step: {Step} ({Type})",
+            lastStep.StepName, lastStep.StepType);
+    }
+}
+```
+
+### Tracking step-by-step progress
+
+Each step records timing and row counts:
+
+```csharp
+foreach (var step in result.Steps)
+{
+    logger.LogInformation("Step {Name} ({Type}): {Rows} rows in {Elapsed}ms",
+        step.StepName,
+        step.StepType,
+        step.RowsAffected,
+        step.Elapsed.TotalMilliseconds);
+}
+```
+
+### Enabling detailed logging
+
+Inject a logger into the pipeline for execution-level logging:
+
+```csharp
+var pipeline = new EtlPipelineBuilder()
+    .WithName("DebugPipeline")
+    .WithSource(source)
+    .WithDestination(destination)
+    .WithLogger(loggerFactory.CreateLogger<EtlPipeline>())
+    .Build();
+```
+
+Pipeline logs include:
+- `Information`: Pipeline start/complete with row counts
+- `Debug`: Individual script execution
+- `Error`: Failure with exception and last step
+
+### Identifying the failure point
+
+When a pipeline fails, `PipelineResult.Steps` contains all completed steps:
+
+```csharp
+if (!result.Success)
+{
+    // Steps completed before failure
+    var completedSteps = result.Steps.Select(s => s.StepName);
+    logger.LogError("Completed steps: {Steps}", string.Join(" → ", completedSteps));
+
+    // The exception contains root cause
+    logger.LogError(result.Error, "Root cause");
+}
+```
+
+## Performance Tuning
+
+### Batch size optimization
+
+Default batch size is 10,000 rows. Adjust based on row width:
+
+| Row Size | Recommended Batch Size |
+|----------|------------------------|
+| Narrow (< 20 columns) | 10,000 - 50,000 |
+| Medium (20-50 columns) | 5,000 - 10,000 |
+| Wide (> 50 columns) | 1,000 - 5,000 |
+
+```csharp
+// Large batch for narrow rows
+new DbBulkImportDestination(factory, "LookupTable", batchSize: 50000)
+
+// Small batch for wide rows
+new DbBulkMergeDestination(factory, "DetailTable", matchColumns, batchSize: 2000)
+```
+
+### Index management for bulk loads
+
+Disable indexes before large imports, rebuild after:
+
+```csharp
+var pipeline = new EtlPipelineBuilder()
+    .WithName("FullTableRefresh")
+    .WithPreScript(CommonScripts.DisableIndexes(factory, "LargeTable"))
+    .WithSource(source)
+    .WithDestination(new DbBulkImportDestination(factory, "LargeTable"))
+    .WithPostScript(CommonScripts.RebuildIndexes(factory, "LargeTable"))
+    .WithPostScript(CommonScripts.UpdateStatistics(factory, "LargeTable"))
+    .Build();
+```
+
+**When to use:**
+- Full table refreshes (TRUNCATE + import)
+- Tables with 3+ non-clustered indexes
+- Import of 100,000+ rows
+
+**When to skip:**
+- Incremental merges with few rows
+- Tables with only a clustered index
+- Frequent small updates
+
+### Timeout sizing guidelines
+
+| Operation | Rows | Suggested Timeout |
+|-----------|------|-------------------|
+| Bulk import | < 100K | 600s (default) |
+| Bulk import | 100K - 1M | 1800s (30 min) |
+| Bulk import | > 1M | 3600s (1 hour) |
+| Bulk merge | < 50K | 600s (default) |
+| Bulk merge | 50K - 500K | 1800s (30 min) |
+| Index rebuild | Any | 3600s (default) |
+
+```csharp
+// Large table with extended timeout
+new DbBulkMergeDestination(factory, "HistoricalData",
+    matchColumns: new[] { "RecordId" },
+    commandTimeoutSeconds: 1800)
+```
+
+### Reducing network and memory usage
+
+Select only needed columns in the source query:
+
+```csharp
+// Good - select only needed columns
+var source = new DbQuerySource(factory,
+    "SELECT OrderNumber, Status, StartDate FROM JDE.WorkOrders");
+
+// Avoid - selecting all columns wastes bandwidth
+var source = new DbQuerySource(factory,
+    "SELECT * FROM JDE.WorkOrders");
+```
+
+Extra columns in the source are ignored by the destination column mapping, but they still consume network bandwidth and memory.
+
+### Monitoring baseline performance
+
+Track `PipelineResult.Elapsed` over time to detect degradation:
+
+```csharp
+var result = await pipeline.ExecuteAsync(ct);
+
+metrics.RecordPipeline(
+    pipelineName: pipeline.PipelineName,
+    success: result.Success,
+    rows: result.TotalRows,
+    durationMs: result.Elapsed.TotalMilliseconds);
+
+// Alert if duration exceeds baseline by 50%
+if (result.Elapsed > baselineDuration * 1.5)
+{
+    logger.LogWarning("Pipeline {Name} took {Elapsed} (baseline: {Baseline})",
+        pipeline.PipelineName, result.Elapsed, baselineDuration);
+}
+```
+
+### Step-level performance analysis
+
+Identify slow steps using `StepResult.Elapsed`:
+
+```csharp
+var slowSteps = result.Steps
+    .Where(s => s.Elapsed > TimeSpan.FromSeconds(30))
+    .OrderByDescending(s => s.Elapsed);
+
+foreach (var step in slowSteps)
+{
+    logger.LogWarning("Slow step: {Name} took {Elapsed}",
+        step.StepName, step.Elapsed);
+}
+```
+
+## Related Documentation
+
+- [Overview](./Overview.md) - Pipeline architecture
+- [Configuration](./Configuration.md) - Timeout and batch size options
+- [Destinations](./Destinations.md) - Script patterns for index management