-
Notifications
You must be signed in to change notification settings - Fork 0
/
Func_Split-FileToStream.ps1
164 lines (131 loc) · 5.78 KB
/
Func_Split-FileToStream.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
################################################################################
# Author : Antony Onipko
# Copyright : (c) 2016 Antony Onipko. All rights reserved.
################################################################################
# This work is licensed under the
# Creative Commons Attribution-ShareAlike 4.0 International License.
# To view a copy of this license, visit
# https://creativecommons.org/licenses/by-sa/4.0/
################################################################################
Function Split-FileToStream {
<#
.SYNOPSIS
Loads a file into memory and splits it into mulitple memory streams. Can be used with a delimiter.
The default split size is the number of logical CPUs.
.EXAMPLE
Split-FileToStream 'C:\path\to\file.txt'
.EXAMPLE
Split-FileToStream -Path 'C:\path\to\file.idx' -Delimiter 'somechars'
#>
[CmdletBinding()]
[OutputType([PsObject])]
Param(
# Path to file to split
[Parameter(Mandatory=$true,
Position=1,
ValueFromPipeline=$true,
ValueFromPipelineByPropertyName=$true)]
[Alias('FullName')]
[string]$Path,
# The delimiter on which to split the file. Default is [System.Environment]::NewLine.
[Parameter(Mandatory=$false,
Position=2,
ValueFromPipelineByPropertyName=$true)]
[string]$Delimiter = [System.Environment]::NewLine,
# Buffer size for the stream reader and writers. Default is 4KB.
[Parameter(Mandatory=$false)]
$BufferSize = 4KB,
# Maximum number of resulting memory streams. Default is number of Logical Processors.
[Parameter(Mandatory=$false)]
$SplitNumber,
# Encoding of the input stream / file. Default is UTF8.
[Parameter(Mandatory=$false)]
[System.Text.Encoding]$Encoding = [System.Text.Encoding]::UTF8
)
Begin {
if (!$SplitNumber) {
$SplitNumber = Get-WmiObject Win32_Processor | Measure-Object -Sum -Property NumberOfLogicalProcessors `
| select -ExpandProperty Sum
}
}
Process {
$stream = New-Object System.IO.FileStream(
$Path,
[System.IO.FileMode]::Open,
[System.IO.FileAccess]::Read,
[System.IO.FileShare]::Read,
$BufferSize,
[System.IO.FileOptions]::SequentialScan
)
[long]$splitSize = $stream.Length / $SplitNumber
################################################################################
# Get rid of the BOM if there is one
if ($Encoding.GetPreamble()) {
$bomLen = $Encoding.GetPreamble().Length
$bom = New-Object byte[] $bomLen
$stream.Read($bom, 0, $bomLen) | Out-Null
if (Compare-Object $bom $Encoding.GetPreamble()) {
$stream.Seek(0, [System.IO.SeekOrigin]::Begin) | Out-Null
} else {
Write-Warning "BOM found ($($bomLen) bytes). It will be ignored."
}
}
################################################################################
$buf = New-Object byte[] $BufferSize
$mem = New-Object System.IO.MemoryStream
$splitPointer = $splitSize
$start = $stream.Position
while (($lt = $stream.Read($buf, 0, $buf.Length)) -gt 0) {
if ($stream.Position -ge $splitPointer -and
($stream.Position -eq $stream.Length -or
($lastDelim = $Encoding.GetString($buf[0..($lt-1)]).LastIndexOf($Delimiter)) -ne -1)) {
if ($stream.Position -eq $stream.Length) {
$endPosition = $lt
$end = $stream.Position
} else {
$endPosition = $Encoding.GetBytes(
$Encoding.GetString($buf).Substring(0, $lastDelim + $Delimiter.Length)
).Length
# Grab the carriage return and/or new line if there is one
if ($Encoding.GetString($buf[$endPosition]) -eq "`r") {
$endPosition ++
}
if ($Encoding.GetString($buf[$endPosition]) -eq "`n") {
$endPosition ++
}
$end = $stream.Position - $BufferSize + $endPosition
}
$mem.Write($buf, 0, $endPosition)
$mem.Seek(0, [System.IO.SeekOrigin]::Begin) | Out-Null
[pscustomobject]@{
File = $Path
Stream = $mem
Start = $start
End = $end
} | Add-Member -MemberType ScriptProperty `
-Name Length `
-Value { $this.Stream.Length } `
-PassThru `
| Write-Output
if ($stream.Position -ne $stream.Length) {
$mem = New-Object System.IO.MemoryStream
$mem.Write($buf, $endPosition, $lt - $endPosition)
$splitPointer += $splitSize
if ($splitPointer -gt $stream.Length) {
$splitPointer = $stream.Length
}
$start = $end
}
} else {
$mem.Write($buf, 0, $lt)
}
}
}
End {
if ($stream) {
Write-Verbose "Closing stream."
$stream.Close()
}
}
}
Export-ModuleMember -Function 'Split-FileToStream'