From bafebb27037b3a11ac872fed5ec1f8efeffb608a Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sat, 3 Jan 2026 15:41:21 -0500 Subject: [PATCH] docs: add ETL troubleshooting documentation --- DOCUMENTATION/DataSync/Troubleshooting.md | 233 ++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 DOCUMENTATION/DataSync/Troubleshooting.md diff --git a/DOCUMENTATION/DataSync/Troubleshooting.md b/DOCUMENTATION/DataSync/Troubleshooting.md new file mode 100644 index 0000000..d0ffbe6 --- /dev/null +++ b/DOCUMENTATION/DataSync/Troubleshooting.md @@ -0,0 +1,233 @@ +# Troubleshooting + +This document covers common errors, debugging patterns, and performance tuning for the ETL pipeline. + +## Common Errors + +### Column mapping errors + +| Error | Cause | Resolution | +|-------|-------|------------| +| "No columns from source exist in destination table" | Source column names don't match destination | Check source query column aliases match destination table columns exactly (case-insensitive) | +| "Column name collision" | Transformer creates duplicate column names | Review rename mappings; ensure no two columns map to the same output name | +| "Column '{name}' not found or was dropped" | Accessing a column that was dropped | Check transformer chain order; don't access dropped columns in later transformers | + +### Computed column errors + +| Error | Cause | Resolution | +|-------|-------|------------| +| "GetBytes not supported for computed column at ordinal N" | Binary access on transformed column | Use `GetValue()` instead; computed columns (like JDE dates) don't support binary access | +| "GetChars not supported for computed column at ordinal N" | Same as above | Use `GetValue()` or `GetString()` | +| "GetData not supported for computed column at ordinal N" | Same as above | Computed columns can't return nested readers | + +### Timeout errors + +| Error | Cause | Resolution | +|-------|-------|------------| +| `SqlException: Timeout expired` during bulk copy | Large dataset, slow network | Increase `commandTimeoutSeconds` on destination | +| `SqlException: Timeout expired` during MERGE | Many rows to match | Increase timeout; consider smaller batches | +| `SqlException: Timeout expired` during script | Index rebuild on large table | Increase script `timeoutSeconds` (default 3600s for rebuild) | + +### Validation errors + +| Error | Cause | Resolution | +|-------|-------|------------| +| "Source is required. Call WithSource() before Build()" | Missing source in pipeline | Add `.WithSource()` to builder chain | +| "Destination is required. Call WithDestination() before Build()" | Missing destination in pipeline | Add `.WithDestination()` to builder chain | +| "At least one match column is required" | Empty matchColumns array | Provide key columns for MERGE matching | +| "Timeout must be between 0 and 24 hours" | Invalid timeout value | Use `TimeSpan` between 0 and 24 hours | + +## Debugging Patterns + +### Inspecting pipeline results + +Check `PipelineResult` after execution to understand what happened: + +```csharp +var result = await pipeline.ExecuteAsync(cancellationToken); + +if (!result.Success) +{ + logger.LogError(result.Error, "Pipeline failed after {Rows} rows in {Elapsed}", + result.TotalRows, result.Elapsed); + + // Find which step failed + var lastStep = result.Steps.LastOrDefault(); + if (lastStep != null) + { + logger.LogError("Failed at step: {Step} ({Type})", + lastStep.StepName, lastStep.StepType); + } +} +``` + +### Tracking step-by-step progress + +Each step records timing and row counts: + +```csharp +foreach (var step in result.Steps) +{ + logger.LogInformation("Step {Name} ({Type}): {Rows} rows in {Elapsed}ms", + step.StepName, + step.StepType, + step.RowsAffected, + step.Elapsed.TotalMilliseconds); +} +``` + +### Enabling detailed logging + +Inject a logger into the pipeline for execution-level logging: + +```csharp +var pipeline = new EtlPipelineBuilder() + .WithName("DebugPipeline") + .WithSource(source) + .WithDestination(destination) + .WithLogger(loggerFactory.CreateLogger()) + .Build(); +``` + +Pipeline logs include: +- `Information`: Pipeline start/complete with row counts +- `Debug`: Individual script execution +- `Error`: Failure with exception and last step + +### Identifying the failure point + +When a pipeline fails, `PipelineResult.Steps` contains all completed steps: + +```csharp +if (!result.Success) +{ + // Steps completed before failure + var completedSteps = result.Steps.Select(s => s.StepName); + logger.LogError("Completed steps: {Steps}", string.Join(" → ", completedSteps)); + + // The exception contains root cause + logger.LogError(result.Error, "Root cause"); +} +``` + +## Performance Tuning + +### Batch size optimization + +Default batch size is 10,000 rows. Adjust based on row width: + +| Row Size | Recommended Batch Size | +|----------|------------------------| +| Narrow (< 20 columns) | 10,000 - 50,000 | +| Medium (20-50 columns) | 5,000 - 10,000 | +| Wide (> 50 columns) | 1,000 - 5,000 | + +```csharp +// Large batch for narrow rows +new DbBulkImportDestination(factory, "LookupTable", batchSize: 50000) + +// Small batch for wide rows +new DbBulkMergeDestination(factory, "DetailTable", matchColumns, batchSize: 2000) +``` + +### Index management for bulk loads + +Disable indexes before large imports, rebuild after: + +```csharp +var pipeline = new EtlPipelineBuilder() + .WithName("FullTableRefresh") + .WithPreScript(CommonScripts.DisableIndexes(factory, "LargeTable")) + .WithSource(source) + .WithDestination(new DbBulkImportDestination(factory, "LargeTable")) + .WithPostScript(CommonScripts.RebuildIndexes(factory, "LargeTable")) + .WithPostScript(CommonScripts.UpdateStatistics(factory, "LargeTable")) + .Build(); +``` + +**When to use:** +- Full table refreshes (TRUNCATE + import) +- Tables with 3+ non-clustered indexes +- Import of 100,000+ rows + +**When to skip:** +- Incremental merges with few rows +- Tables with only a clustered index +- Frequent small updates + +### Timeout sizing guidelines + +| Operation | Rows | Suggested Timeout | +|-----------|------|-------------------| +| Bulk import | < 100K | 600s (default) | +| Bulk import | 100K - 1M | 1800s (30 min) | +| Bulk import | > 1M | 3600s (1 hour) | +| Bulk merge | < 50K | 600s (default) | +| Bulk merge | 50K - 500K | 1800s (30 min) | +| Index rebuild | Any | 3600s (default) | + +```csharp +// Large table with extended timeout +new DbBulkMergeDestination(factory, "HistoricalData", + matchColumns: new[] { "RecordId" }, + commandTimeoutSeconds: 1800) +``` + +### Reducing network and memory usage + +Select only needed columns in the source query: + +```csharp +// Good - select only needed columns +var source = new DbQuerySource(factory, + "SELECT OrderNumber, Status, StartDate FROM JDE.WorkOrders"); + +// Avoid - selecting all columns wastes bandwidth +var source = new DbQuerySource(factory, + "SELECT * FROM JDE.WorkOrders"); +``` + +Extra columns in the source are ignored by the destination column mapping, but they still consume network bandwidth and memory. + +### Monitoring baseline performance + +Track `PipelineResult.Elapsed` over time to detect degradation: + +```csharp +var result = await pipeline.ExecuteAsync(ct); + +metrics.RecordPipeline( + pipelineName: pipeline.PipelineName, + success: result.Success, + rows: result.TotalRows, + durationMs: result.Elapsed.TotalMilliseconds); + +// Alert if duration exceeds baseline by 50% +if (result.Elapsed > baselineDuration * 1.5) +{ + logger.LogWarning("Pipeline {Name} took {Elapsed} (baseline: {Baseline})", + pipeline.PipelineName, result.Elapsed, baselineDuration); +} +``` + +### Step-level performance analysis + +Identify slow steps using `StepResult.Elapsed`: + +```csharp +var slowSteps = result.Steps + .Where(s => s.Elapsed > TimeSpan.FromSeconds(30)) + .OrderByDescending(s => s.Elapsed); + +foreach (var step in slowSteps) +{ + logger.LogWarning("Slow step: {Name} took {Elapsed}", + step.StepName, step.Elapsed); +} +``` + +## Related Documentation + +- [Overview](./Overview.md) - Pipeline architecture +- [Configuration](./Configuration.md) - Timeout and batch size options +- [Destinations](./Destinations.md) - Script patterns for index management