| 1 | #Region "Microsoft.VisualBasic::bef434b26946e143966850cc00512dec, Microsoft.VisualBasic.Core\Text\Parser\FormattedParser.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Module FormattedParser |
| 35 | ' |
| 36 | ' Function: CrossFields, FieldParser, FlagSplit |
| 37 | ' Delegate Function |
| 38 | ' |
| 39 | ' Function: ReadHead, UntilBlank |
| 40 | ' |
| 41 | ' |
| 42 | ' |
| 43 | ' /********************************************************************************/ |
| 44 | |
| 45 | #End Region |
| 46 | |
| 47 | Imports System.Runtime.CompilerServices |
| 48 | Imports System.Text.RegularExpressions |
| 49 | Imports Microsoft.VisualBasic.Linq |
| 50 | Imports Microsoft.VisualBasic.Language |
| 51 | |
| 52 | Namespace Text |
| 53 | |
| 54 | ''' <summary> |
| 55 | ''' Parser API for the well formatted documents. |
| 56 | ''' </summary> |
| 57 | Public Module FormattedParser |
| 58 | |
| 59 | ''' <summary> |
| 60 | ''' String collection tokens by a certain delimiter string element. |
| 61 | ''' </summary> |
| 62 | ''' <param name="source"></param> |
| 63 | ''' <param name="isFlag"> |
| 64 | ''' |
| 65 | ''' </param> |
| 66 | ''' <returns></returns> |
| 67 | <Extension> Public Iterator Function FlagSplit(source As IEnumerable(Of String), isFlag As Func(Of String, Boolean)) As IEnumerable(Of String()) |
| 68 | Dim list As New List(Of String) |
| 69 | |
| 70 | ' >> ......... |
| 71 | ' ............ |
| 72 | ' ............ |
| 73 | ' >> ......... |
| 74 | ' >> ......... |
| 75 | |
| 76 | ' 例如上面的一段文本,使用 >> 符号作为段落的分隔符, |
| 77 | ' 则本函数会将上面的文本分为三行: |
| 78 | ' >> ......... |
| 79 | ' ............ |
| 80 | ' ............ |
| 81 | ' 和 |
| 82 | ' >> ......... |
| 83 | ' 和 |
| 84 | ' >> ......... |
| 85 | |
| 86 | For Each line As String In source |
| 87 | If isFlag(line) Then |
| 88 | If list.Count > 0 Then |
| 89 | Yield list.ToArray |
| 90 | Call list.Clear() |
| 91 | End If |
| 92 | End If |
| 93 | |
| 94 | Call list.Add(line) |
| 95 | Next |
| 96 | |
| 97 | If list.Count > 0 Then |
| 98 | Yield list.ToArray |
| 99 | End If |
| 100 | End Function |
| 101 | |
| 102 | ''' <summary> |
| 103 | ''' Example as: ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- |
| 104 | ''' </summary> |
| 105 | ''' <param name="s"></param> |
| 106 | ''' <returns></returns> |
| 107 | <Extension> Public Function CrossFields(s As String) As Integer() |
| 108 | Dim sps As String() = Regex.Matches(s, "\s+").ToArray |
| 109 | Dim lens As String() = Regex.Matches(s, "-+").ToArray |
| 110 | Dim fieldLens As New List(Of Integer) |
| 111 | |
| 112 | For i As Integer = 0 To lens.Length - 1 |
| 113 | fieldLens += sps(i).Length |
| 114 | fieldLens += lens(i).Length |
| 115 | Next |
| 116 | |
| 117 | Dim fields As Integer() = fieldLens.ToArray |
| 118 | Return fields |
| 119 | End Function |
| 120 | |
| 121 | ''' <summary> |
| 122 | ''' Parsing a line of string into several fields fragments based on the fields length. |
| 123 | ''' </summary> |
| 124 | ''' <param name="s">The input text line.</param> |
| 125 | ''' <param name="pos">The text length of each field property value.</param> |
| 126 | ''' <returns></returns> |
| 127 | <Extension> Public Function FieldParser(s As String, pos As Integer()) As String() |
| 128 | Dim list As New List(Of String) |
| 129 | Dim offset As Integer |
| 130 | |
| 131 | For Each len As Integer In pos.Take(pos.Length - 1) |
| 132 | list += s.Substring(offset, len) ' 起始的位置是根据域的长度逐步叠加的 |
| 133 | offset += len |
| 134 | Next |
| 135 | |
| 136 | list += s.Substring(offset) |
| 137 | |
| 138 | Return list.ToArray |
| 139 | End Function |
| 140 | |
| 141 | ''' <summary> |
| 142 | ''' Condition for continue move the parser pointer. |
| 143 | ''' </summary> |
| 144 | ''' <param name="s"></param> |
| 145 | ''' <returns></returns> |
| 146 | Public Delegate Function DoContinute(s As String) As Boolean |
| 147 | |
| 148 | ''' <summary> |
| 149 | ''' Parsing the document head section from the document. |
| 150 | ''' </summary> |
| 151 | ''' <param name="buf"></param> |
| 152 | ''' <param name="offset"> |
| 153 | ''' This function will returns the new offset value from this reference parameter. |
| 154 | ''' (从这里向调用者返回偏移量) |
| 155 | ''' </param> |
| 156 | ''' <param name="__isHead">Condition for continue move the parser pointer to the next line.</param> |
| 157 | ''' <returns></returns> |
| 158 | <Extension> |
| 159 | Public Function ReadHead(buf As String(), ByRef offset As Integer, __isHead As DoContinute) As String() |
| 160 | Do While __isHead(buf.Read(offset)) |
| 161 | Loop |
| 162 | |
| 163 | Dim copy As String() = New String(offset - 1) {} |
| 164 | Call Array.ConstrainedCopy(buf, Scan0, copy, Scan0, offset) |
| 165 | Return copy |
| 166 | End Function |
| 167 | |
| 168 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
| 169 | Public Function UntilBlank(s As String) As Boolean |
| 170 | Return Not s.StringEmpty |
| 171 | End Function |
| 172 | End Module |
| 173 | End Namespace |