1 | #Region "Microsoft.VisualBasic::c30b2917413894c537cfb17fe771b98e, Microsoft.VisualBasic.Core\ComponentModel\File\BufferedStream.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Class BufferedStream |
35 | ' |
36 | ' Properties: EndRead, FileName |
37 | ' |
38 | ' Constructor: (+3 Overloads) Sub New |
39 | ' |
40 | ' Function: BufferProvider, LinesIterator, ReadAllLines, ToString |
41 | ' |
42 | ' Sub: (+2 Overloads) Dispose, Reset |
43 | ' |
44 | ' |
45 | ' /********************************************************************************/ |
46 | |
47 | #End Region |
48 | |
49 | Imports System.IO |
50 | Imports System.Text |
51 | Imports System.Web.Script.Serialization |
52 | Imports System.Xml.Serialization |
53 | Imports Microsoft.VisualBasic.Serialization.JSON |
54 | Imports Microsoft.VisualBasic.Text |
55 | |
56 | Namespace ComponentModel |
57 | |
58 | ''' <summary> |
59 | ''' Buffered large text dataset reader |
60 | ''' </summary> |
61 | Public Class BufferedStream |
62 | Implements IDisposable |
63 | |
64 | ''' <summary> |
65 | ''' The File location of this text file. |
66 | ''' </summary> |
67 | ''' <returns></returns> |
68 | <XmlIgnore> <ScriptIgnore> Public Property FileName As String |
69 | Get |
70 | Return __fileName |
71 | End Get |
72 | Protected Set(value As String) |
73 | __fileName = value |
74 | End Set |
75 | End Property |
76 | |
77 | Protected __fileName As String |
78 | Protected __innerBuffer As String() |
79 | Protected __innerStream As FileStream |
80 | |
81 | Const maxBufferSize As Integer = 512 * 1024 * 1024 |
82 | |
83 | Protected __bufferSize As Integer |
84 | Protected __encoding As Encoding |
85 | |
86 | Public Overrides Function ToString() As String |
87 | Dim encodes As String = __encoding.ToString |
88 | Dim x As New With {encodes, EndRead, .lefts = Me.lefts.Length, __bufferSize, FileName} |
89 | Return x.GetJson |
90 | End Function |
91 | |
92 | ''' <summary> |
93 | ''' |
94 | ''' </summary> |
95 | ''' <param name="path"></param> |
96 | ''' <param name="encoding"><see cref="System.Text.Encoding.Default"/>, if null</param> |
97 | Sub New(path$, Optional encoding As Encoding = Nothing, Optional maxBufferSize As Integer = BufferedStream.maxBufferSize) |
98 | If Not path.FileExists Then |
99 | Throw New FileNotFoundException("Buffer file is not found!", path.GetFullPath) |
100 | ElseIf maxBufferSize > BufferedStream.maxBufferSize Then |
101 | Throw New InternalBufferOverflowException($"String reader buffer(size={maxBufferSize} bytes) is too large!") |
102 | Else |
103 | FileName = path |
104 | encoding = encoding Or DefaultEncoding |
105 | __encoding = encoding |
106 | End If |
107 | |
108 | Dim file As FileInfo = FileIO.FileSystem.GetFileInfo(path) |
109 | |
110 | If file.Length > maxBufferSize Then |
111 | __innerStream = IO.File.Open(path, FileMode.Open, FileAccess.Read, FileShare.Read) |
112 | __bufferSize = maxBufferSize |
113 | Else |
114 | __innerBuffer = IO.File.ReadAllLines(path, encoding) |
115 | End If |
116 | End Sub |
117 | |
118 | Sub New(stream As FileStream, Optional readSize As Integer = BufferedStream.maxBufferSize) |
119 | __innerStream = stream |
120 | __bufferSize = readSize |
121 | End Sub |
122 | |
123 | Sub New() |
124 | End Sub |
125 | |
126 | ''' <summary> |
127 | ''' End of buffer read? |
128 | ''' </summary> |
129 | ''' <returns></returns> |
130 | Public ReadOnly Property EndRead As Boolean = False |
131 | |
132 | Dim lefts As Byte() = New Byte(-1) {} |
133 | |
134 | ''' <summary> |
135 | ''' Reset the stream buffer reader to its initial state. |
136 | ''' </summary> |
137 | Public Overridable Sub Reset() |
138 | _EndRead = False |
139 | lefts = New Byte(-1) {} |
140 | If Not __innerStream Is Nothing Then |
141 | __innerStream.Position = Scan0 |
142 | End If |
143 | End Sub |
144 | |
145 | Dim l As Integer |
146 | |
147 | ''' <summary> |
148 | ''' 当<see cref="EndRead"/>之后,这个函数将不会返回任何值 |
149 | ''' </summary> |
150 | ''' <returns></returns> |
151 | Public Overridable Function BufferProvider() As String() |
152 | If EndRead Then |
153 | Return Nothing |
154 | Else |
155 | If __innerBuffer Is Nothing Then |
156 | Dim buffer As Byte() |
157 | |
158 | If __innerStream.Length - __innerStream.Position >= __bufferSize Then |
159 | l = lefts.Length + __bufferSize |
160 | _EndRead = False |
161 | Else |
162 | l = __innerStream.Length - __innerStream.Position |
163 | _EndRead = True |
164 | End If |
165 | |
166 | buffer = New Byte(lefts.Length + l - 1) {} |
167 | Call __innerStream.Read(buffer, lefts.Length, l) |
168 | Call Array.ConstrainedCopy(lefts, Scan0, buffer, Scan0, lefts.Length) |
169 | |
170 | Dim s As String = __encoding.GetString(buffer) |
171 | Dim sbuf As String() = s.LineTokens() |
172 | |
173 | If Not EndRead Then |
174 | Dim last As String = sbuf.Last |
175 | Dim lch As Char = s.Last |
176 | If lch = vbLf OrElse lch = vbCr Then |
177 | last &= vbCrLf ' 由于ltokens会替换掉换行符,可能会导致bug,所以在这里进行判断,尝试进行补齐操作 |
178 | End If |
179 | lefts = __encoding.GetBytes(last) |
180 | sbuf = sbuf.Take(sbuf.Length - 1).ToArray |
181 | End If |
182 | |
183 | Return sbuf |
184 | Else |
185 | _EndRead = True |
186 | Return DirectCast(__innerBuffer.Clone, String()) |
187 | End If |
188 | End If |
189 | End Function |
190 | |
191 | Public Iterator Function ReadAllLines() As IEnumerable(Of String) |
192 | Call Reset() |
193 | |
194 | Do While Not EndRead |
195 | For Each line As String In BufferProvider() |
196 | Yield line |
197 | Next |
198 | Loop |
199 | End Function |
200 | |
201 | Public Shared Iterator Function LinesIterator(path As String, Optional encoding As Encodings = Encodings.Default) As IEnumerable(Of String) |
202 | Using read As New BufferedStream(path, encoding.CodePage) |
203 | Do While Not read.EndRead |
204 | For Each line As String In read.BufferProvider |
205 | Yield line |
206 | Next |
207 | Loop |
208 | End Using |
209 | End Function |
210 | |
211 | #Region "IDisposable Support" |
212 | Private disposedValue As Boolean ' To detect redundant calls |
213 | |
214 | ' IDisposable |
215 | Protected Overridable Sub Dispose(disposing As Boolean) |
216 | If Not Me.disposedValue Then |
217 | If disposing Then |
218 | ' TODO: dispose managed state (managed objects). |
219 | If Not __innerStream Is Nothing Then |
220 | Call __innerStream.Dispose() |
221 | End If |
222 | End If |
223 | |
224 | ' TODO: free unmanaged resources (unmanaged objects) and override Finalize() below. |
225 | ' TODO: set large fields to null. |
226 | End If |
227 | Me.disposedValue = True |
228 | End Sub |
229 | |
230 | ' TODO: override Finalize() only if Dispose(disposing As Boolean) above has code to free unmanaged resources. |
231 | 'Protected Overrides Sub Finalize() |
232 | ' ' Do not change this code. Put cleanup code in Dispose(disposing As Boolean) above. |
233 | ' Dispose(False) |
234 | ' MyBase.Finalize() |
235 | 'End Sub |
236 | |
237 | ' This code added by Visual Basic to correctly implement the disposable pattern. |
238 | Public Sub Dispose() Implements IDisposable.Dispose |
239 | ' Do not change this code. Put cleanup code in Dispose(disposing As Boolean) above. |
240 | Dispose(True) |
241 | ' TODO: uncomment the following line if Finalize() is overridden above. |
242 | ' GC.SuppressFinalize(Me) |
243 | End Sub |
244 | #End Region |
245 | End Class |
246 | End Namespace |