| 1 | #Region "Microsoft.VisualBasic::16c09e6ed8764347ede7cc93905e0cc5, Microsoft.VisualBasic.Core\Extensions\Doc\Text.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Module TextDoc |
| 35 | ' |
| 36 | ' Function: ForEachChar, IsTextFile, IterateAllLines, LineIterators, LoadTextDoc |
| 37 | ' OpenWriter, ReadAllLines, ReadAllText, ReadFirstLine, SaveHTML |
| 38 | ' SaveJson, (+4 Overloads) SaveTo, SaveTSV, SaveWithHTMLEncoding, SolveStream |
| 39 | ' TsvHeaders |
| 40 | ' |
| 41 | ' /********************************************************************************/ |
| 42 | |
| 43 | #End Region |
| 44 | |
| 45 | Imports System.IO |
| 46 | Imports System.Runtime.CompilerServices |
| 47 | Imports System.Text |
| 48 | Imports Microsoft.VisualBasic.CommandLine.Reflection |
| 49 | Imports Microsoft.VisualBasic.ComponentModel |
| 50 | Imports Microsoft.VisualBasic.ComponentModel.Collection |
| 51 | Imports Microsoft.VisualBasic.Linq |
| 52 | Imports Microsoft.VisualBasic.Scripting.MetaData |
| 53 | Imports Microsoft.VisualBasic.Serialization.JSON |
| 54 | Imports Microsoft.VisualBasic.Text |
| 55 | Imports fs = Microsoft.VisualBasic.FileIO.FileSystem |
| 56 | |
| 57 | <Package("Doc.TextFile", Category:=APICategories.UtilityTools, Publisher:="xie.guigang@gmail.com")> |
| 58 | Public Module TextDoc |
| 59 | |
| 60 | ''' <summary> |
| 61 | ''' 默认是加载Xml文件的 |
| 62 | ''' </summary> |
| 63 | ''' <typeparam name="T"></typeparam> |
| 64 | ''' <param name="file"></param> |
| 65 | ''' <param name="encoding"></param> |
| 66 | ''' <param name="parser">default is Xml parser</param> |
| 67 | ''' <param name="ThrowEx"></param> |
| 68 | ''' <returns></returns> |
| 69 | <Extension> |
| 70 | Public Function LoadTextDoc(Of T As ITextFile)(file$, |
| 71 | Optional encoding As Encoding = Nothing, |
| 72 | Optional parser As Func(Of String, Encoding, T) = Nothing, |
| 73 | Optional ThrowEx As Boolean = True) As T |
| 74 | If parser Is Nothing Then |
| 75 | parser = AddressOf LoadXml |
| 76 | End If |
| 77 | |
| 78 | Dim FileObj As T |
| 79 | |
| 80 | Try |
| 81 | FileObj = parser(file, encoding) |
| 82 | FileObj.FilePath = file |
| 83 | Catch ex As Exception |
| 84 | Call App.LogException(New Exception(file.ToFileURL, ex)) |
| 85 | |
| 86 | If ThrowEx Then |
| 87 | Throw ex |
| 88 | Else |
| 89 | #If DEBUG Then |
| 90 | Call ex.PrintException |
| 91 | #End If |
| 92 | Return Nothing |
| 93 | End If |
| 94 | End Try |
| 95 | |
| 96 | Return FileObj |
| 97 | End Function |
| 98 | |
| 99 | ''' <summary> |
| 100 | ''' |
| 101 | ''' </summary> |
| 102 | ''' <param name="handle$"> |
| 103 | ''' + 当这个参数为文件路径的时候会返回<see cref="Linq.IteratesALL(Of T)(IEnumerable(Of IEnumerable(Of T)))"/>函数的结果 |
| 104 | ''' + 当这个参数只是为文本字符串的时候,则会返回<see cref="LineTokens"/>函数的结果 |
| 105 | ''' </param> |
| 106 | ''' <returns></returns> |
| 107 | <Extension> |
| 108 | Public Function LineIterators(handle$) As IEnumerable(Of String) |
| 109 | If handle.FileExists Then |
| 110 | Return handle.IterateAllLines |
| 111 | Else |
| 112 | Return handle.LineTokens |
| 113 | End If |
| 114 | End Function |
| 115 | |
| 116 | ''' <summary> |
| 117 | ''' 解析出TSV文件的头部并且生成index数据 |
| 118 | ''' </summary> |
| 119 | ''' <param name="path$">``*.tsv``文件路径</param> |
| 120 | ''' <returns></returns> |
| 121 | <Extension> |
| 122 | Public Function TsvHeaders(path$) As Index(Of String) |
| 123 | Dim header$() = path.ReadFirstLine.Split(ASCII.TAB) |
| 124 | Dim index As New Index(Of String)(header) |
| 125 | Return index |
| 126 | End Function |
| 127 | |
| 128 | ''' <summary> |
| 129 | ''' 将IDmapping数据保存为tsv文件 |
| 130 | ''' </summary> |
| 131 | ''' <param name="tsv"></param> |
| 132 | ''' <param name="path$"></param> |
| 133 | ''' <param name="encoding"></param> |
| 134 | ''' <returns></returns> |
| 135 | <Extension> |
| 136 | Public Function SaveTSV(tsv As IEnumerable(Of IDMap), path$, Optional encoding As Encodings = Encodings.ASCII) As Boolean |
| 137 | Dim lines = tsv.Select(Function(x) x.TSV) |
| 138 | Return lines.SaveTo(path, encoding.CodePage) |
| 139 | End Function |
| 140 | |
| 141 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 142 | <Extension> |
| 143 | Public Function SaveJson(Of T)(obj As T, path$, |
| 144 | Optional encoding As Encoding = Nothing, |
| 145 | Optional indent As Boolean = False) As Boolean |
| 146 | |
| 147 | Return obj.GetJson(indent:=indent).SaveTo(path, encoding) |
| 148 | End Function |
| 149 | |
| 150 | ''' <summary> |
| 151 | ''' Enumerate all of the chars in the target text file. |
| 152 | ''' </summary> |
| 153 | ''' <param name="path"></param> |
| 154 | ''' <param name="encoding"></param> |
| 155 | ''' <returns></returns> |
| 156 | <Extension> |
| 157 | Public Iterator Function ForEachChar(path$, Optional encoding As Encodings = Encodings.Default) As IEnumerable(Of Char) |
| 158 | Using file As New FileStream(path, FileMode.Open) |
| 159 | Using reader As New BinaryReader(file, encoding.CodePage) |
| 160 | Dim bs As Stream = reader.BaseStream |
| 161 | Dim l As Long = bs.Length |
| 162 | |
| 163 | Do While bs.Position < l |
| 164 | Yield reader.ReadChar |
| 165 | Loop |
| 166 | End Using |
| 167 | End Using |
| 168 | End Function |
| 169 | |
| 170 | ''' <summary> |
| 171 | ''' Open text file writer, this function will auto handle all things. |
| 172 | ''' </summary> |
| 173 | ''' <param name="path"></param> |
| 174 | ''' <param name="encoding"></param> |
| 175 | ''' <returns></returns> |
| 176 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 177 | <Extension> |
| 178 | Public Function OpenWriter(path$, |
| 179 | Optional encoding As Encodings = Encodings.UTF8, |
| 180 | Optional newLine$ = ASCII.LF, |
| 181 | Optional append As Boolean = False) As StreamWriter |
| 182 | Return FileIO.OpenWriter(path, encoding.CodePage, newLine, append) |
| 183 | End Function |
| 184 | |
| 185 | ''' <summary> |
| 186 | ''' Reading a super large size text file through stream method. |
| 187 | ''' (通过具有缓存的流对象读取文本数据,使用迭代器来读取文件之中的所有的行,大文件推荐使用这个方法进行读取操作) |
| 188 | ''' </summary> |
| 189 | ''' <param name="path"></param> |
| 190 | ''' <returns>不存在的文件会返回空集合</returns> |
| 191 | <Extension> |
| 192 | Public Iterator Function IterateAllLines(path$, Optional encoding As Encodings = Encodings.Default) As IEnumerable(Of String) |
| 193 | If Not path.FileExists Then |
| 194 | Return |
| 195 | End If |
| 196 | |
| 197 | Using fs As New FileStream(path, FileMode.Open, access:=FileAccess.Read, share:=FileShare.Read) |
| 198 | Using reader As New StreamReader(fs, encoding.CodePage) |
| 199 | |
| 200 | Do While Not reader.EndOfStream |
| 201 | Yield reader.ReadLine |
| 202 | Loop |
| 203 | End Using |
| 204 | End Using |
| 205 | End Function |
| 206 | |
| 207 | ''' <summary> |
| 208 | ''' Read the first line of the text in the file. |
| 209 | ''' </summary> |
| 210 | ''' <param name="path"></param> |
| 211 | ''' <param name="encoding"> |
| 212 | ''' Parameter value is set to <see cref="DefaultEncoding"/> if this parameter is not specific. |
| 213 | ''' </param> |
| 214 | ''' <returns></returns> |
| 215 | <Extension> Public Function ReadFirstLine(path$, Optional encoding As Encoding = Nothing) As String |
| 216 | Using file As New FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read) |
| 217 | Using reader As New StreamReader(file, encoding Or DefaultEncoding) |
| 218 | Dim first$ = reader.ReadLine |
| 219 | Return first |
| 220 | End Using |
| 221 | End Using |
| 222 | End Function |
| 223 | |
| 224 | ''' <summary> |
| 225 | ''' 自动进行判断解决所读取的数据源,当<paramref name="handle"/>为文件路径的时候,会读取文件内容,反之则会直接返回<paramref name="handle"/>的内容 |
| 226 | ''' </summary> |
| 227 | ''' <param name="handle$">文本内容或者文件路径</param> |
| 228 | ''' <returns></returns> |
| 229 | <Extension> Public Function SolveStream(handle$, Optional encoding As Encodings = Encodings.UTF8) As String |
| 230 | If handle.FileExists(True) Then |
| 231 | Return handle.ReadAllText(encoding.CodePage) |
| 232 | Else |
| 233 | Return handle |
| 234 | End If |
| 235 | End Function |
| 236 | |
| 237 | ''' <summary> |
| 238 | ''' This function just suite for read a small text file.(这个函数只建议读取小文本文件的时候使用) |
| 239 | ''' </summary> |
| 240 | ''' <param name="path"></param> |
| 241 | ''' <param name="encoding">Default value is <see cref="Encoding.UTF8"/></param> |
| 242 | ''' <param name="suppress">Suppress error message??</param> |
| 243 | ''' <returns></returns> |
| 244 | ''' <remarks></remarks> |
| 245 | ''' |
| 246 | <ExportAPI("Read.TXT")> |
| 247 | <Extension> |
| 248 | Public Function ReadAllText(path$, |
| 249 | Optional encoding As Encoding = Nothing, |
| 250 | Optional throwEx As Boolean = True, |
| 251 | Optional suppress As Boolean = False) As String |
| 252 | Try |
| 253 | Return fs.ReadAllText(path, encoding:=encoding Or UTF8) |
| 254 | Catch ex As Exception |
| 255 | ex = New Exception(path.ToFileURL, ex) |
| 256 | |
| 257 | If throwEx Then |
| 258 | Throw ex |
| 259 | Else |
| 260 | Call App.LogException(ex) |
| 261 | |
| 262 | If Not suppress Then |
| 263 | Call ex.PrintException |
| 264 | End If |
| 265 | End If |
| 266 | End Try |
| 267 | |
| 268 | Return Nothing |
| 269 | End Function |
| 270 | |
| 271 | ''' <summary> |
| 272 | ''' This function is recommend using for the small(probably smaller than 300MB) text file reading. |
| 273 | ''' (这个函数只建议读取小文本文件的时候使用) |
| 274 | ''' </summary> |
| 275 | ''' <param name="path"></param> |
| 276 | ''' <param name="Encoding">Default value is UTF8</param> |
| 277 | ''' <returns></returns> |
| 278 | ''' <remarks></remarks> |
| 279 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 280 | <ExportAPI("Read.Lines")> |
| 281 | <Extension> |
| 282 | Public Function ReadAllLines(path$, Optional Encoding As Encoding = Nothing) As String() |
| 283 | If path.FileExists Then |
| 284 | Return File.ReadAllLines(path, encoding:=Encoding Or DefaultEncoding) |
| 285 | Else |
| 286 | Return New String() {} |
| 287 | End If |
| 288 | End Function |
| 289 | |
| 290 | ''' <summary> |
| 291 | ''' 使用html文本的默认编码格式<see cref="Encodings.UTF8"/>来保存这个文本文件 |
| 292 | ''' </summary> |
| 293 | ''' <param name="html$"></param> |
| 294 | ''' <param name="path$"></param> |
| 295 | ''' <returns></returns> |
| 296 | <Extension> |
| 297 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 298 | Public Function SaveWithHTMLEncoding(html$, path$) As Boolean |
| 299 | Return html.SaveTo(path, Encoding.UTF8) |
| 300 | End Function |
| 301 | |
| 302 | ''' <summary> |
| 303 | ''' Write the text file data into a file which was specific by the <paramref name="path"></paramref> value, |
| 304 | ''' this function not append the new data onto the target file. |
| 305 | ''' (将目标文本字符串写入到一个指定路径的文件之中,但是不会在文件末尾追加新的数据) |
| 306 | ''' </summary> |
| 307 | ''' <param name="path"></param> |
| 308 | ''' <param name="text"></param> |
| 309 | ''' <param name="encoding">这个函数会自动处理文本的编码的</param> |
| 310 | ''' <returns></returns> |
| 311 | ''' <remarks></remarks> |
| 312 | ''' |
| 313 | <ExportAPI("Write.Text")> |
| 314 | <Extension> Public Function SaveTo(<Parameter("Text")> text As String, |
| 315 | <Parameter("Path")> path As String, |
| 316 | <Parameter("Text.Encoding")> |
| 317 | Optional encoding As Encoding = Nothing, |
| 318 | Optional append As Boolean = False, |
| 319 | Optional throwEx As Boolean = True) As Boolean |
| 320 | |
| 321 | If String.IsNullOrEmpty(path) Then |
| 322 | Return False |
| 323 | End If |
| 324 | |
| 325 | Dim DIR As String |
| 326 | |
| 327 | #If UNIX Then |
| 328 | DIR = System.IO.Directory.GetParent(path).FullName |
| 329 | #Else |
| 330 | Try |
| 331 | path = PathExtensions.Long2Short(path) |
| 332 | DIR = fs.GetParentPath(path) |
| 333 | Catch ex As Exception |
| 334 | Dim msg As String = $" **** Directory string is illegal or string is too long: [{NameOf(path)}:={path}] > 260" |
| 335 | Throw New Exception(msg, ex) |
| 336 | End Try |
| 337 | #End If |
| 338 | |
| 339 | If String.IsNullOrEmpty(DIR) Then |
| 340 | DIR = App.CurrentDirectory |
| 341 | Else |
| 342 | DIR.MkDIR(throwEx:=False) |
| 343 | End If |
| 344 | |
| 345 | Try |
| 346 | Call fs.WriteAllText(path, text Or EmptyString, append, encoding Or UTF8) |
| 347 | Catch ex As Exception |
| 348 | ex = New Exception("[DIR] " & DIR, ex) |
| 349 | ex = New Exception("[Path] " & path, ex) |
| 350 | |
| 351 | If throwEx Then |
| 352 | Throw ex |
| 353 | Else |
| 354 | Call App.LogException(ex) |
| 355 | Return False |
| 356 | End If |
| 357 | End Try |
| 358 | |
| 359 | Return True |
| 360 | End Function |
| 361 | |
| 362 | ''' <summary> |
| 363 | ''' Save the inner text value of a xml element |
| 364 | ''' </summary> |
| 365 | ''' <param name="value"></param> |
| 366 | ''' <param name="path"></param> |
| 367 | ''' <param name="encoding"></param> |
| 368 | ''' <returns></returns> |
| 369 | <ExportAPI("Write.Text")> |
| 370 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 371 | <Extension> Public Function SaveTo(value As XElement, path$, Optional encoding As Encoding = Nothing) As Boolean |
| 372 | Return value.Value.SaveTo(path, encoding) |
| 373 | End Function |
| 374 | |
| 375 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 376 | <Extension> |
| 377 | Public Function SaveHTML(html As XElement, path$, Optional encoding As Encodings = Encodings.UTF8WithoutBOM) As Boolean |
| 378 | Return html.ToString.SaveTo(path, encoding.CodePage) |
| 379 | End Function |
| 380 | |
| 381 | ''' <summary> |
| 382 | ''' Determined that the target file is a text file or binary file? |
| 383 | ''' (判断是否是文本文件) |
| 384 | ''' </summary> |
| 385 | ''' <param name="path">文件全路径名称</param> |
| 386 | ''' <returns>是返回True,不是返回False</returns> |
| 387 | ''' <param name="chunkSize">文件检查的长度,假若在这个长度内都没有超过null的阈值数,则认为该文件为文本文件,默认区域长度为4KB</param> |
| 388 | ''' <remarks>2012年12月5日</remarks> |
| 389 | ''' |
| 390 | <ExportAPI("IsTextFile")> |
| 391 | <Extension> Public Function IsTextFile(path$, Optional chunkSize% = 4 * 1024) As Boolean |
| 392 | Using file As New FileStream(path, FileMode.Open, FileAccess.Read) |
| 393 | Dim byteData(1) As Byte |
| 394 | Dim i% |
| 395 | Dim p% |
| 396 | |
| 397 | While file.Read(byteData, 0, byteData.Length) > 0 |
| 398 | If byteData(0) = 0 Then i += 1 |
| 399 | |
| 400 | If p <= chunkSize Then |
| 401 | p += 1 |
| 402 | Else |
| 403 | Exit While |
| 404 | End If |
| 405 | End While |
| 406 | |
| 407 | Return i <= 0.1 * p |
| 408 | End Using |
| 409 | End Function |
| 410 | |
| 411 | ''' <summary> |
| 412 | ''' 将目标字符串集合数据全部写入到文件之中,当所写入的文件位置之上没有父文件夹存在的时候,会自动创建文件夹 |
| 413 | ''' </summary> |
| 414 | ''' <param name="array"></param> |
| 415 | ''' <param name="path"></param> |
| 416 | ''' <param name="encoding"></param> |
| 417 | ''' <returns></returns> |
| 418 | ''' <remarks></remarks> |
| 419 | ''' |
| 420 | <ExportAPI("Write.Text")> |
| 421 | <Extension> Public Function SaveTo(array As IEnumerable(Of String), path$, Optional encoding As Encoding = Nothing) As Boolean |
| 422 | If String.IsNullOrEmpty(path) Then |
| 423 | Return False |
| 424 | End If |
| 425 | |
| 426 | Call "".SaveTo(path) |
| 427 | |
| 428 | Using fs As New FileStream(path, FileMode.OpenOrCreate), |
| 429 | file As New StreamWriter(fs, encoding Or DefaultEncoding) |
| 430 | |
| 431 | For Each line$ In array.SafeQuery |
| 432 | Call file.WriteLine(line) |
| 433 | Next |
| 434 | End Using |
| 435 | |
| 436 | Return True |
| 437 | End Function |
| 438 | |
| 439 | ''' <summary> |
| 440 | ''' Save the text content in the <see cref="StringBuilder"/> object into a text file. |
| 441 | ''' </summary> |
| 442 | ''' <param name="sb"></param> |
| 443 | ''' <param name="path"></param> |
| 444 | ''' <param name="encoding"></param> |
| 445 | ''' <returns></returns> |
| 446 | <ExportAPI("Write.Text")> |
| 447 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 448 | <Extension> Public Function SaveTo(sb As StringBuilder, path$, Optional encoding As Encoding = Nothing) As Boolean |
| 449 | Return sb.ToString.SaveTo(path, encoding) |
| 450 | End Function |
| 451 | End Module |