1 | #Region "Microsoft.VisualBasic::bef434b26946e143966850cc00512dec, Microsoft.VisualBasic.Core\Text\Parser\FormattedParser.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Module FormattedParser |
35 | ' |
36 | ' Function: CrossFields, FieldParser, FlagSplit |
37 | ' Delegate Function |
38 | ' |
39 | ' Function: ReadHead, UntilBlank |
40 | ' |
41 | ' |
42 | ' |
43 | ' /********************************************************************************/ |
44 | |
45 | #End Region |
46 | |
47 | Imports System.Runtime.CompilerServices |
48 | Imports System.Text.RegularExpressions |
49 | Imports Microsoft.VisualBasic.Linq |
50 | Imports Microsoft.VisualBasic.Language |
51 | |
52 | Namespace Text |
53 | |
54 | ''' <summary> |
55 | ''' Parser API for the well formatted documents. |
56 | ''' </summary> |
57 | Public Module FormattedParser |
58 | |
59 | ''' <summary> |
60 | ''' String collection tokens by a certain delimiter string element. |
61 | ''' </summary> |
62 | ''' <param name="source"></param> |
63 | ''' <param name="isFlag"> |
64 | ''' |
65 | ''' </param> |
66 | ''' <returns></returns> |
67 | <Extension> Public Iterator Function FlagSplit(source As IEnumerable(Of String), isFlag As Func(Of String, Boolean)) As IEnumerable(Of String()) |
68 | Dim list As New List(Of String) |
69 | |
70 | ' >> ......... |
71 | ' ............ |
72 | ' ............ |
73 | ' >> ......... |
74 | ' >> ......... |
75 | |
76 | ' 例如上面的一段文本,使用 >> 符号作为段落的分隔符, |
77 | ' 则本函数会将上面的文本分为三行: |
78 | ' >> ......... |
79 | ' ............ |
80 | ' ............ |
81 | ' 和 |
82 | ' >> ......... |
83 | ' 和 |
84 | ' >> ......... |
85 | |
86 | For Each line As String In source |
87 | If isFlag(line) Then |
88 | If list.Count > 0 Then |
89 | Yield list.ToArray |
90 | Call list.Clear() |
91 | End If |
92 | End If |
93 | |
94 | Call list.Add(line) |
95 | Next |
96 | |
97 | If list.Count > 0 Then |
98 | Yield list.ToArray |
99 | End If |
100 | End Function |
101 | |
102 | ''' <summary> |
103 | ''' Example as: ------- ------ ----- ------- ------ ----- ---- -- -------- ----------- |
104 | ''' </summary> |
105 | ''' <param name="s"></param> |
106 | ''' <returns></returns> |
107 | <Extension> Public Function CrossFields(s As String) As Integer() |
108 | Dim sps As String() = Regex.Matches(s, "\s+").ToArray |
109 | Dim lens As String() = Regex.Matches(s, "-+").ToArray |
110 | Dim fieldLens As New List(Of Integer) |
111 | |
112 | For i As Integer = 0 To lens.Length - 1 |
113 | fieldLens += sps(i).Length |
114 | fieldLens += lens(i).Length |
115 | Next |
116 | |
117 | Dim fields As Integer() = fieldLens.ToArray |
118 | Return fields |
119 | End Function |
120 | |
121 | ''' <summary> |
122 | ''' Parsing a line of string into several fields fragments based on the fields length. |
123 | ''' </summary> |
124 | ''' <param name="s">The input text line.</param> |
125 | ''' <param name="pos">The text length of each field property value.</param> |
126 | ''' <returns></returns> |
127 | <Extension> Public Function FieldParser(s As String, pos As Integer()) As String() |
128 | Dim list As New List(Of String) |
129 | Dim offset As Integer |
130 | |
131 | For Each len As Integer In pos.Take(pos.Length - 1) |
132 | list += s.Substring(offset, len) ' 起始的位置是根据域的长度逐步叠加的 |
133 | offset += len |
134 | Next |
135 | |
136 | list += s.Substring(offset) |
137 | |
138 | Return list.ToArray |
139 | End Function |
140 | |
141 | ''' <summary> |
142 | ''' Condition for continue move the parser pointer. |
143 | ''' </summary> |
144 | ''' <param name="s"></param> |
145 | ''' <returns></returns> |
146 | Public Delegate Function DoContinute(s As String) As Boolean |
147 | |
148 | ''' <summary> |
149 | ''' Parsing the document head section from the document. |
150 | ''' </summary> |
151 | ''' <param name="buf"></param> |
152 | ''' <param name="offset"> |
153 | ''' This function will returns the new offset value from this reference parameter. |
154 | ''' (从这里向调用者返回偏移量) |
155 | ''' </param> |
156 | ''' <param name="__isHead">Condition for continue move the parser pointer to the next line.</param> |
157 | ''' <returns></returns> |
158 | <Extension> |
159 | Public Function ReadHead(buf As String(), ByRef offset As Integer, __isHead As DoContinute) As String() |
160 | Do While __isHead(buf.Read(offset)) |
161 | Loop |
162 | |
163 | Dim copy As String() = New String(offset - 1) {} |
164 | Call Array.ConstrainedCopy(buf, Scan0, copy, Scan0, offset) |
165 | Return copy |
166 | End Function |
167 | |
168 | <MethodImpl(MethodImplOptions.AggressiveInlining)> |
169 | Public Function UntilBlank(s As String) As Boolean |
170 | Return Not s.StringEmpty |
171 | End Function |
172 | End Module |
173 | End Namespace |