1 | #Region "Microsoft.VisualBasic::16c09e6ed8764347ede7cc93905e0cc5, Microsoft.VisualBasic.Core\Extensions\Doc\Text.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Module TextDoc |
35 | ' |
36 | ' Function: ForEachChar, IsTextFile, IterateAllLines, LineIterators, LoadTextDoc |
37 | ' OpenWriter, ReadAllLines, ReadAllText, ReadFirstLine, SaveHTML |
38 | ' SaveJson, (+4 Overloads) SaveTo, SaveTSV, SaveWithHTMLEncoding, SolveStream |
39 | ' TsvHeaders |
40 | ' |
41 | ' /********************************************************************************/ |
42 | |
43 | #End Region |
44 | |
45 | Imports System.IO |
46 | Imports System.Runtime.CompilerServices |
47 | Imports System.Text |
48 | Imports Microsoft.VisualBasic.CommandLine.Reflection |
49 | Imports Microsoft.VisualBasic.ComponentModel |
50 | Imports Microsoft.VisualBasic.ComponentModel.Collection |
51 | Imports Microsoft.VisualBasic.Linq |
52 | Imports Microsoft.VisualBasic.Scripting.MetaData |
53 | Imports Microsoft.VisualBasic.Serialization.JSON |
54 | Imports Microsoft.VisualBasic.Text |
55 | Imports fs = Microsoft.VisualBasic.FileIO.FileSystem |
56 | |
57 | <Package("Doc.TextFile", Category:=APICategories.UtilityTools, Publisher:="xie.guigang@gmail.com")> |
58 | Public Module TextDoc |
59 | |
60 | ''' <summary> |
61 | ''' 默认是加载Xml文件的 |
62 | ''' </summary> |
63 | ''' <typeparam name="T"></typeparam> |
64 | ''' <param name="file"></param> |
65 | ''' <param name="encoding"></param> |
66 | ''' <param name="parser">default is Xml parser</param> |
67 | ''' <param name="ThrowEx"></param> |
68 | ''' <returns></returns> |
69 | <Extension> |
70 | Public Function LoadTextDoc(Of T As ITextFile)(file$, |
71 | Optional encoding As Encoding = Nothing, |
72 | Optional parser As Func(Of String, Encoding, T) = Nothing, |
73 | Optional ThrowEx As Boolean = True) As T |
74 | If parser Is Nothing Then |
75 | parser = AddressOf LoadXml |
76 | End If |
77 | |
78 | Dim FileObj As T |
79 | |
80 | Try |
81 | FileObj = parser(file, encoding) |
82 | FileObj.FilePath = file |
83 | Catch ex As Exception |
84 | Call App.LogException(New Exception(file.ToFileURL, ex)) |
85 | |
86 | If ThrowEx Then |
87 | Throw ex |
88 | Else |
89 | #If DEBUG Then |
90 | Call ex.PrintException |
91 | #End If |
92 | Return Nothing |
93 | End If |
94 | End Try |
95 | |
96 | Return FileObj |
97 | End Function |
98 | |
99 | ''' <summary> |
100 | ''' |
101 | ''' </summary> |
102 | ''' <param name="handle$"> |
103 | ''' + 当这个参数为文件路径的时候会返回<see cref="Linq.IteratesALL(Of T)(IEnumerable(Of IEnumerable(Of T)))"/>函数的结果 |
104 | ''' + 当这个参数只是为文本字符串的时候,则会返回<see cref="LineTokens"/>函数的结果 |
105 | ''' </param> |
106 | ''' <returns></returns> |
107 | <Extension> |
108 | Public Function LineIterators(handle$) As IEnumerable(Of String) |
109 | If handle.FileExists Then |
110 | Return handle.IterateAllLines |
111 | Else |
112 | Return handle.LineTokens |
113 | End If |
114 | End Function |
115 | |
116 | ''' <summary> |
117 | ''' 解析出TSV文件的头部并且生成index数据 |
118 | ''' </summary> |
119 | ''' <param name="path$">``*.tsv``文件路径</param> |
120 | ''' <returns></returns> |
121 | <Extension> |
122 | Public Function TsvHeaders(path$) As Index(Of String) |
123 | Dim header$() = path.ReadFirstLine.Split(ASCII.TAB) |
124 | Dim index As New Index(Of String)(header) |
125 | Return index |
126 | End Function |
127 | |
128 | ''' <summary> |
129 | ''' 将IDmapping数据保存为tsv文件 |
130 | ''' </summary> |
131 | ''' <param name="tsv"></param> |
132 | ''' <param name="path$"></param> |
133 | ''' <param name="encoding"></param> |
134 | ''' <returns></returns> |
135 | <Extension> |
136 | Public Function SaveTSV(tsv As IEnumerable(Of IDMap), path$, Optional encoding As Encodings = Encodings.ASCII) As Boolean |
137 | Dim lines = tsv.Select(Function(x) x.TSV) |
138 | Return lines.SaveTo(path, encoding.CodePage) |
139 | End Function |
140 | |
141 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
142 | <Extension> |
143 | Public Function SaveJson(Of T)(obj As T, path$, |
144 | Optional encoding As Encoding = Nothing, |
145 | Optional indent As Boolean = False) As Boolean |
146 | |
147 | Return obj.GetJson(indent:=indent).SaveTo(path, encoding) |
148 | End Function |
149 | |
150 | ''' <summary> |
151 | ''' Enumerate all of the chars in the target text file. |
152 | ''' </summary> |
153 | ''' <param name="path"></param> |
154 | ''' <param name="encoding"></param> |
155 | ''' <returns></returns> |
156 | <Extension> |
157 | Public Iterator Function ForEachChar(path$, Optional encoding As Encodings = Encodings.Default) As IEnumerable(Of Char) |
158 | Using file As New FileStream(path, FileMode.Open) |
159 | Using reader As New BinaryReader(file, encoding.CodePage) |
160 | Dim bs As Stream = reader.BaseStream |
161 | Dim l As Long = bs.Length |
162 | |
163 | Do While bs.Position < l |
164 | Yield reader.ReadChar |
165 | Loop |
166 | End Using |
167 | End Using |
168 | End Function |
169 | |
170 | ''' <summary> |
171 | ''' Open text file writer, this function will auto handle all things. |
172 | ''' </summary> |
173 | ''' <param name="path"></param> |
174 | ''' <param name="encoding"></param> |
175 | ''' <returns></returns> |
176 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
177 | <Extension> |
178 | Public Function OpenWriter(path$, |
179 | Optional encoding As Encodings = Encodings.UTF8, |
180 | Optional newLine$ = ASCII.LF, |
181 | Optional append As Boolean = False) As StreamWriter |
182 | Return FileIO.OpenWriter(path, encoding.CodePage, newLine, append) |
183 | End Function |
184 | |
185 | ''' <summary> |
186 | ''' Reading a super large size text file through stream method. |
187 | ''' (通过具有缓存的流对象读取文本数据,使用迭代器来读取文件之中的所有的行,大文件推荐使用这个方法进行读取操作) |
188 | ''' </summary> |
189 | ''' <param name="path"></param> |
190 | ''' <returns>不存在的文件会返回空集合</returns> |
191 | <Extension> |
192 | Public Iterator Function IterateAllLines(path$, Optional encoding As Encodings = Encodings.Default) As IEnumerable(Of String) |
193 | If Not path.FileExists Then |
194 | Return |
195 | End If |
196 | |
197 | Using fs As New FileStream(path, FileMode.Open, access:=FileAccess.Read, share:=FileShare.Read) |
198 | Using reader As New StreamReader(fs, encoding.CodePage) |
199 | |
200 | Do While Not reader.EndOfStream |
201 | Yield reader.ReadLine |
202 | Loop |
203 | End Using |
204 | End Using |
205 | End Function |
206 | |
207 | ''' <summary> |
208 | ''' Read the first line of the text in the file. |
209 | ''' </summary> |
210 | ''' <param name="path"></param> |
211 | ''' <param name="encoding"> |
212 | ''' Parameter value is set to <see cref="DefaultEncoding"/> if this parameter is not specific. |
213 | ''' </param> |
214 | ''' <returns></returns> |
215 | <Extension> Public Function ReadFirstLine(path$, Optional encoding As Encoding = Nothing) As String |
216 | Using file As New FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read) |
217 | Using reader As New StreamReader(file, encoding Or DefaultEncoding) |
218 | Dim first$ = reader.ReadLine |
219 | Return first |
220 | End Using |
221 | End Using |
222 | End Function |
223 | |
224 | ''' <summary> |
225 | ''' 自动进行判断解决所读取的数据源,当<paramref name="handle"/>为文件路径的时候,会读取文件内容,反之则会直接返回<paramref name="handle"/>的内容 |
226 | ''' </summary> |
227 | ''' <param name="handle$">文本内容或者文件路径</param> |
228 | ''' <returns></returns> |
229 | <Extension> Public Function SolveStream(handle$, Optional encoding As Encodings = Encodings.UTF8) As String |
230 | If handle.FileExists(True) Then |
231 | Return handle.ReadAllText(encoding.CodePage) |
232 | Else |
233 | Return handle |
234 | End If |
235 | End Function |
236 | |
237 | ''' <summary> |
238 | ''' This function just suite for read a small text file.(这个函数只建议读取小文本文件的时候使用) |
239 | ''' </summary> |
240 | ''' <param name="path"></param> |
241 | ''' <param name="encoding">Default value is <see cref="Encoding.UTF8"/></param> |
242 | ''' <param name="suppress">Suppress error message??</param> |
243 | ''' <returns></returns> |
244 | ''' <remarks></remarks> |
245 | ''' |
246 | <ExportAPI("Read.TXT")> |
247 | <Extension> |
248 | Public Function ReadAllText(path$, |
249 | Optional encoding As Encoding = Nothing, |
250 | Optional throwEx As Boolean = True, |
251 | Optional suppress As Boolean = False) As String |
252 | Try |
253 | Return fs.ReadAllText(path, encoding:=encoding Or UTF8) |
254 | Catch ex As Exception |
255 | ex = New Exception(path.ToFileURL, ex) |
256 | |
257 | If throwEx Then |
258 | Throw ex |
259 | Else |
260 | Call App.LogException(ex) |
261 | |
262 | If Not suppress Then |
263 | Call ex.PrintException |
264 | End If |
265 | End If |
266 | End Try |
267 | |
268 | Return Nothing |
269 | End Function |
270 | |
271 | ''' <summary> |
272 | ''' This function is recommend using for the small(probably smaller than 300MB) text file reading. |
273 | ''' (这个函数只建议读取小文本文件的时候使用) |
274 | ''' </summary> |
275 | ''' <param name="path"></param> |
276 | ''' <param name="Encoding">Default value is UTF8</param> |
277 | ''' <returns></returns> |
278 | ''' <remarks></remarks> |
279 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
280 | <ExportAPI("Read.Lines")> |
281 | <Extension> |
282 | Public Function ReadAllLines(path$, Optional Encoding As Encoding = Nothing) As String() |
283 | If path.FileExists Then |
284 | Return File.ReadAllLines(path, encoding:=Encoding Or DefaultEncoding) |
285 | Else |
286 | Return New String() {} |
287 | End If |
288 | End Function |
289 | |
290 | ''' <summary> |
291 | ''' 使用html文本的默认编码格式<see cref="Encodings.UTF8"/>来保存这个文本文件 |
292 | ''' </summary> |
293 | ''' <param name="html$"></param> |
294 | ''' <param name="path$"></param> |
295 | ''' <returns></returns> |
296 | <Extension> |
297 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
298 | Public Function SaveWithHTMLEncoding(html$, path$) As Boolean |
299 | Return html.SaveTo(path, Encoding.UTF8) |
300 | End Function |
301 | |
302 | ''' <summary> |
303 | ''' Write the text file data into a file which was specific by the <paramref name="path"></paramref> value, |
304 | ''' this function not append the new data onto the target file. |
305 | ''' (将目标文本字符串写入到一个指定路径的文件之中,但是不会在文件末尾追加新的数据) |
306 | ''' </summary> |
307 | ''' <param name="path"></param> |
308 | ''' <param name="text"></param> |
309 | ''' <param name="encoding">这个函数会自动处理文本的编码的</param> |
310 | ''' <returns></returns> |
311 | ''' <remarks></remarks> |
312 | ''' |
313 | <ExportAPI("Write.Text")> |
314 | <Extension> Public Function SaveTo(<Parameter("Text")> text As String, |
315 | <Parameter("Path")> path As String, |
316 | <Parameter("Text.Encoding")> |
317 | Optional encoding As Encoding = Nothing, |
318 | Optional append As Boolean = False, |
319 | Optional throwEx As Boolean = True) As Boolean |
320 | |
321 | If String.IsNullOrEmpty(path) Then |
322 | Return False |
323 | End If |
324 | |
325 | Dim DIR As String |
326 | |
327 | #If UNIX Then |
328 | DIR = System.IO.Directory.GetParent(path).FullName |
329 | #Else |
330 | Try |
331 | path = PathExtensions.Long2Short(path) |
332 | DIR = fs.GetParentPath(path) |
333 | Catch ex As Exception |
334 | Dim msg As String = $" **** Directory string is illegal or string is too long: [{NameOf(path)}:={path}] > 260" |
335 | Throw New Exception(msg, ex) |
336 | End Try |
337 | #End If |
338 | |
339 | If String.IsNullOrEmpty(DIR) Then |
340 | DIR = App.CurrentDirectory |
341 | Else |
342 | DIR.MkDIR(throwEx:=False) |
343 | End If |
344 | |
345 | Try |
346 | Call fs.WriteAllText(path, text Or EmptyString, append, encoding Or UTF8) |
347 | Catch ex As Exception |
348 | ex = New Exception("[DIR] " & DIR, ex) |
349 | ex = New Exception("[Path] " & path, ex) |
350 | |
351 | If throwEx Then |
352 | Throw ex |
353 | Else |
354 | Call App.LogException(ex) |
355 | Return False |
356 | End If |
357 | End Try |
358 | |
359 | Return True |
360 | End Function |
361 | |
362 | ''' <summary> |
363 | ''' Save the inner text value of a xml element |
364 | ''' </summary> |
365 | ''' <param name="value"></param> |
366 | ''' <param name="path"></param> |
367 | ''' <param name="encoding"></param> |
368 | ''' <returns></returns> |
369 | <ExportAPI("Write.Text")> |
370 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
371 | <Extension> Public Function SaveTo(value As XElement, path$, Optional encoding As Encoding = Nothing) As Boolean |
372 | Return value.Value.SaveTo(path, encoding) |
373 | End Function |
374 | |
375 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
376 | <Extension> |
377 | Public Function SaveHTML(html As XElement, path$, Optional encoding As Encodings = Encodings.UTF8WithoutBOM) As Boolean |
378 | Return html.ToString.SaveTo(path, encoding.CodePage) |
379 | End Function |
380 | |
381 | ''' <summary> |
382 | ''' Determined that the target file is a text file or binary file? |
383 | ''' (判断是否是文本文件) |
384 | ''' </summary> |
385 | ''' <param name="path">文件全路径名称</param> |
386 | ''' <returns>是返回True,不是返回False</returns> |
387 | ''' <param name="chunkSize">文件检查的长度,假若在这个长度内都没有超过null的阈值数,则认为该文件为文本文件,默认区域长度为4KB</param> |
388 | ''' <remarks>2012年12月5日</remarks> |
389 | ''' |
390 | <ExportAPI("IsTextFile")> |
391 | <Extension> Public Function IsTextFile(path$, Optional chunkSize% = 4 * 1024) As Boolean |
392 | Using file As New FileStream(path, FileMode.Open, FileAccess.Read) |
393 | Dim byteData(1) As Byte |
394 | Dim i% |
395 | Dim p% |
396 | |
397 | While file.Read(byteData, 0, byteData.Length) > 0 |
398 | If byteData(0) = 0 Then i += 1 |
399 | |
400 | If p <= chunkSize Then |
401 | p += 1 |
402 | Else |
403 | Exit While |
404 | End If |
405 | End While |
406 | |
407 | Return i <= 0.1 * p |
408 | End Using |
409 | End Function |
410 | |
411 | ''' <summary> |
412 | ''' 将目标字符串集合数据全部写入到文件之中,当所写入的文件位置之上没有父文件夹存在的时候,会自动创建文件夹 |
413 | ''' </summary> |
414 | ''' <param name="array"></param> |
415 | ''' <param name="path"></param> |
416 | ''' <param name="encoding"></param> |
417 | ''' <returns></returns> |
418 | ''' <remarks></remarks> |
419 | ''' |
420 | <ExportAPI("Write.Text")> |
421 | <Extension> Public Function SaveTo(array As IEnumerable(Of String), path$, Optional encoding As Encoding = Nothing) As Boolean |
422 | If String.IsNullOrEmpty(path) Then |
423 | Return False |
424 | End If |
425 | |
426 | Call "".SaveTo(path) |
427 | |
428 | Using fs As New FileStream(path, FileMode.OpenOrCreate), |
429 | file As New StreamWriter(fs, encoding Or DefaultEncoding) |
430 | |
431 | For Each line$ In array.SafeQuery |
432 | Call file.WriteLine(line) |
433 | Next |
434 | End Using |
435 | |
436 | Return True |
437 | End Function |
438 | |
439 | ''' <summary> |
440 | ''' Save the text content in the <see cref="StringBuilder"/> object into a text file. |
441 | ''' </summary> |
442 | ''' <param name="sb"></param> |
443 | ''' <param name="path"></param> |
444 | ''' <param name="encoding"></param> |
445 | ''' <returns></returns> |
446 | <ExportAPI("Write.Text")> |
447 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
448 | <Extension> Public Function SaveTo(sb As StringBuilder, path$, Optional encoding As Encoding = Nothing) As Boolean |
449 | Return sb.ToString.SaveTo(path, encoding) |
450 | End Function |
451 | End Module |