Ever had a bunch of source files with same column-types and column-order that needed to be consolidated together for an easy import? Chances are the files contain source-data for a scheduled ETL process, which got interrupted. Now you have a whole lot of csv’s piled up in your pipeline’s intake folder. If your ETL is configured to accept only one source-file at a time you certainly do not want to run it gazillion times. Wish there were a UNION ALL utility for csv’s? Well, there is one now: the PowerShell below will combine all csv files into a single one, which you can then ingest with no need to modify the ETL or run it repeatedly.

Easiest way to run it is to first copy the script into the directory where all your source files are (this way, when prompted, $inputDir parameter can be left blank to accept the default current directory location), on next prompt enter filename pattern matching your source files ($csvFileNameSearchPattern - or leave blank for default *.csv), choose a name for the combined-output file ($csvCombinedOutputFileName - or leave blank to accept the default Combined_Output_ yyyyMMdd_HHmm.csv) and tell the script weather your source files have headers or contain raw data only ($filesHaveHeaders).

After producing the combined output file in the same directory where your source files are the script will even run basic data-integrity checks by:

1️⃣ adding up all records of individual source-files (excluding headers if present), comparing that sum with the row-count of the output file and displaying status as either: PASS or FAIL

2️⃣ adding up individual source-file sizes, comparing that figure with the output file size and displaying the difference.

Below an example that looks for all files matching the name-pattern: dbo.FactResellerSales*.csv inside the directory D:\DOCKER_SHARE\Windows\BackupCommon\CombineCsvsTest\ and “fuses” them all together into dbo.FactResellerSalesXL.Combined.csv

PowerShell CombineManyCsvsIntoOne

Here is the script:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
################################################  Common Parameters: ################################################ 
$inputDir                   = $null # Change to for example: "C:\Path\ToDirectory\WhereCsvs\Are\"
$csvFileNameSearchPattern   = $null # Change to for example: "YourFileNamePattern*.csv"
$csvCombinedOutputFileName  = $null # Change to for example: Join-Path $inputDir "YourFileNamePattern.Combined.csv"
$filesHaveHeaders           = $null # Change to for example: $true  # Set to $false if files contain raw data only

################################################  Dynamic Input Logic: ################################################ 

# 1. Input Directory
$scriptDir = $PSScriptRoot
if (-not $scriptDir) { $scriptDir = Split-Path -Parent $PSCommandPath }
if (-not $scriptDir) { $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path }
if (-not $scriptDir) { $scriptDir = (Get-Location).Path }

if ([string]::IsNullOrWhiteSpace($inputDir)) {
    $currentDirectory = $scriptDir
    $inputDir = Read-Host "Enter the Input Directory path: (Leave blank for default: $currentDirectory)"
}

if ([string]::IsNullOrWhiteSpace($inputDir)) {
    $inputDir = (Get-Location).Path
    Write-Host "InputDir not provided. Defaulting to current directory: $InputDir"
}

try {
    $InputDir = (Resolve-Path -LiteralPath $InputDir -ErrorAction Stop).Path
} catch {
    throw "ERROR: Path not found: $InputDir"
}
Write-Host "Using InputDir: '$InputDir'"

# 2. Search Pattern (e.g., *.csv or MyTable_*.csv):
if ([string]::IsNullOrWhiteSpace($csvFileNameSearchPattern)) {
    $defaultFileNameSearchPattern = "*.csv"
    $csvFileNameSearchPattern = Read-Host "Enter the File-Name Search Pattern: (Leave blank for default: $defaultFileNameSearchPattern)"
}

if ([string]::IsNullOrWhiteSpace($csvFileNameSearchPattern)) {
    $csvFileNameSearchPattern = $defaultFileNameSearchPattern
    Write-Host "File-Name Search Pattern not provided. Defaulting to: $defaultFileNameSearchPattern"
}

# 3. Combined Output Filename:
if ([string]::IsNullOrWhiteSpace($csvCombinedOutputFileName)) {
    $suggestedName = "Combined_Output_" + (Get-Date -Format "yyyyMMdd_HHmm") + ".csv"
    $csvCombinedOutputFileName = Read-Host "Enter the name for the combined file (Leave blank for default: $suggestedName)"
    
    if ([string]::IsNullOrWhiteSpace($csvCombinedOutputFileName)) {
        Write-Host "File-Name Search Pattern not provided. Defaulting to: $suggestedName"
        $csvCombinedOutputFileName = $suggestedName
    }
    $csvCombinedOutputFileName = Join-Path $inputDir $csvCombinedOutputFileName
}

# 4. Headers Flag:
if ($null -eq $filesHaveHeaders) {
    $response = Read-Host "Do these files have headers that need to be skipped? (Y/N)"
    $filesHaveHeaders = $response -match '^[Yy]'
}

Write-Host "`n--- Script Configuration Locked ---" -ForegroundColor Yellow
Write-Host "Target Folder: $inputDir"
Write-Host "Search Filter: $csvFileNameSearchPattern"
Write-Host "Output File:   $csvCombinedOutputFileName"
Write-Host "Skip Headers:  $filesHaveHeaders"
Write-Host ("-" * 40)

$files = Get-ChildItem -Path $inputDir -Filter $csvFileNameSearchPattern | 
         Where-Object { $_.FullName -ne $csvCombinedOutputFileName } | 
         Sort-Object Name

if ($null -eq $files) {
    Write-Warning "No files matching '$csvFileNameSearchPattern' were found in $inputDir"
    return
}

################################################  Driver Section: ####################################################

Write-Host "`n ========================================== Combining CSV Files: ============================================ "
$fileCount = $files.Count
$ExplicitEncoding = [System.Text.Encoding]::GetEncoding(1252)
$writer = [System.IO.StreamWriter]::new($csvCombinedOutputFileName, $false, $ExplicitEncoding)

if ($filesHaveHeaders) {

        $fileInfo    = $files[0]
        $currentFile = $fileInfo.FullName
        Write-Host "Saving Header from the first file: $($fileInfo.Name))"
        $reader = [System.IO.StreamReader]::new($currentFile, $ExplicitEncoding)
        $header = $reader.ReadLine()
        $writer.WriteLine($header)
}

try {
    $results = for ($i = 0; $i -lt $fileCount; $i++) {
        $fileInfo    = $files[$i]
        $currentFile = $fileInfo.FullName
        
        Write-Host "Processing File: $($fileInfo.Name) ($($i + 1) of $fileCount)"

        $reader = [System.IO.StreamReader]::new($currentFile, $ExplicitEncoding)
        try {
            $lineCount = 0
            while (-not $reader.EndOfStream) {
                $line = $reader.ReadLine()

                if ($filesHaveHeaders -and $lineCount -eq 0) {
                    # do not do anything
                } 
                else {
                    $writer.WriteLine($line)
                }
                $lineCount++
            }

            [pscustomobject]@{
                FileName   = $fileInfo.Name
                # Subtract 1 only if each file has a header
                LineCount  = if ($filesHaveHeaders) { $lineCount - 1 } else { $lineCount }
                SizeBytes  = $fileInfo.Length
            }

        } finally {
            $reader.Dispose()
        }
    }
} finally {
    $writer.Dispose()
}

Write-Host "`n ========================================== Verifying Totals: =============================================== "
# --- CALCULATE TOTALS: ---
$totalLinesExpected = ($results | Measure-Object LineCount -Sum).Sum
$totalBytesExpected = ($results | Measure-Object SizeBytes -Sum).Sum
$expectedCombinedLines = $totalLinesExpected

# --- GET ACTUAL COMBINED FILE STATS: ---
$actualLines = 0
[System.IO.File]::ReadLines($csvCombinedOutputFileName) | ForEach-Object { $actualLines++ }
$actualLines = $actualLines - $(if ($filesHaveHeaders) { 1 } else { 0 })
$actualBytes = (Get-Item $csvCombinedOutputFileName).Length

# --- DISPLAY SUMMARY: ---
$results | Format-Table -Property `
    @{Expression={$_.FileName}; Label="FileName"; Width = 40}, 
    @{Expression={"{0:N0}" -f $_.LineCount};  Label="LineCount";     Width = 15; Alignment = "Right"}, 
    @{Expression={"{0:N2}" -f ($_.SizeBytes / 1MB)}; Label="FileSize (MB)"; Width=15; Alignment = "Right"}

Write-Host ("-" * 111) 
Write-Host ("GRAND TOTALS FOR INPUT FILES:".PadRight(50) + ("{0:N0}" -f $totalLinesExpected).PadRight(15) + ("{0:N2} MB" -f ($totalBytesExpected / 1MB))) -ForegroundColor Cyan
Write-Host ("-" * 111)

# --- VERIFICATION SECTION: ---
Write-Host "`n ========================================== Verification Results: ============================================= "

# Line Count Verification:
[pscustomobject]@{
    Metric = "Line Count $(if ($filesHaveHeaders) { '(Adj for Headers)' })"
    Expected = "{0:N0}" -f $expectedCombinedLines
    Actual   = "{0:N0}" -f $actualLines
    Status   = if ($expectedCombinedLines -eq $actualLines) { "PASS" } else { "FAIL" }
} | Format-Table -AutoSize

# File Size Verification:
[pscustomobject]@{
    Metric     = "File Size"
    Expected   = "{0:N2} MB" -f ($totalBytesExpected / 1MB)
    Actual     = "{0:N2} MB" -f ($actualBytes / 1MB)
    Difference = "{0:N2} MB" -f [Math]::Abs(($totalBytesExpected - $actualBytes) / 1MB)
} | Format-Table -AutoSize

if ($expectedCombinedLines -eq $actualLines) {
    Write-Host "VERIFICATION SUCCESS: Data integrity confirmed." -ForegroundColor Green
} else {
    Write-Host "VERIFICATION FAILED: Mismatch detected!" -ForegroundColor Red
}