1 | #Region "Microsoft.VisualBasic::c878a4f80f35c6b74634e73312d5657c, Microsoft.VisualBasic.Core\Scripting\TokenIcer\TokenParser.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Class TokenParser |
35 | ' |
36 | ' Properties: InputString, UNDEFINED |
37 | ' |
38 | ' Constructor: (+2 Overloads) Sub New |
39 | ' |
40 | ' Function: GetToken, (+2 Overloads) Peek |
41 | ' |
42 | ' Sub: PrepareRegex, ResetParser |
43 | ' |
44 | ' |
45 | ' /********************************************************************************/ |
46 | |
47 | #End Region |
48 | |
49 | ' This file was Auto Generated with TokenIcer |
50 | Imports System.Collections.Generic |
51 | Imports System.Text.RegularExpressions |
52 | |
53 | Namespace Scripting.TokenIcer |
54 | |
55 | ''' <summary> |
56 | ''' TokenParser |
57 | ''' </summary> |
58 | ''' <remarks> |
59 | ''' TokenParser is the main parser engine for converting input into lexical tokens. |
60 | ''' |
61 | ''' Auto Generated from |
62 | ''' http://www.codeproject.com/Articles/220042/Easily-Create-Your-Own-Parser |
63 | ''' </remarks> |
64 | Public Class TokenParser(Of Tokens As IComparable) |
65 | ' This dictionary will store our RegEx rules |
66 | Private ReadOnly _tokens As Dictionary(Of Tokens, String) |
67 | ' This dictionary will store our matches |
68 | Private ReadOnly _regExMatchCollection As Dictionary(Of Tokens, MatchCollection) |
69 | ' This input string will store the string to parse |
70 | Private _inputString As String |
71 | ' This index is used internally so the parser knows where it left off |
72 | Private _index As Integer |
73 | |
74 | ' A public setter for our input string |
75 | ''' <summary> |
76 | ''' InputString Property |
77 | ''' </summary> |
78 | ''' <value> |
79 | ''' The string value that holds the input string. |
80 | ''' </value> |
81 | Public WriteOnly Property InputString() As String |
82 | Set |
83 | Call ResetParser() |
84 | _inputString = Value |
85 | Call PrepareRegex() |
86 | End Set |
87 | End Property |
88 | |
89 | Public ReadOnly Property UNDEFINED As Tokens |
90 | |
91 | ' Our Constructor, which simply initializes values |
92 | ''' <summary> |
93 | ''' Default Constructor |
94 | ''' </summary> |
95 | ''' <param name="tokens">Values is the regex expression</param> |
96 | ''' <remarks> |
97 | ''' The constructor initalizes memory and adds all of the tokens to the token dictionary. |
98 | ''' </remarks> |
99 | Public Sub New(tokens As IEnumerable(Of KeyValuePair(Of Tokens, String)), UNDEFINED As Tokens) |
100 | Call Me.New(tokens.ToDictionary, UNDEFINED) |
101 | End Sub |
102 | |
103 | ' Our Constructor, which simply initializes values |
104 | ''' <summary> |
105 | ''' Default Constructor |
106 | ''' </summary> |
107 | ''' <param name="tokens">Values is the regex expression</param> |
108 | ''' <remarks> |
109 | ''' The constructor initalizes memory and adds all of the tokens to the token dictionary. |
110 | ''' </remarks> |
111 | Public Sub New(tokens As Dictionary(Of Tokens, String), UNDEFINED As Tokens) |
112 | _tokens = New Dictionary(Of Tokens, String)(tokens) |
113 | _regExMatchCollection = New Dictionary(Of Tokens, MatchCollection)() |
114 | _index = 0 |
115 | _inputString = String.Empty |
116 | _UNDEFINED = UNDEFINED |
117 | End Sub |
118 | |
119 | ' This function preloads the matches based on our rules and the input string |
120 | ''' <summary> |
121 | ''' PrepareRegex prepares the regex for parsing by pre-matching the Regex tokens. |
122 | ''' </summary> |
123 | Private Sub PrepareRegex() |
124 | _regExMatchCollection.Clear() |
125 | For Each pair As KeyValuePair(Of Tokens, String) In _tokens |
126 | _regExMatchCollection.Add(pair.Key, Regex.Matches(_inputString, pair.Value)) |
127 | Next |
128 | End Sub |
129 | |
130 | ' ResetParser() will reset the parser. |
131 | ' Keep in mind that you must set the input string again |
132 | ''' <summary> |
133 | ''' ResetParser resets the parser to its inital state. Reloading InputString is required. |
134 | ''' </summary> |
135 | ''' <seealso cref="TokenParser.InputString" /> |
136 | Public Sub ResetParser() |
137 | _index = 0 |
138 | _inputString = String.Empty |
139 | _regExMatchCollection.Clear() |
140 | End Sub |
141 | |
142 | ' GetToken() retrieves the next token and returns a token object |
143 | ''' <summary> |
144 | ''' GetToken gets the next token in queue |
145 | ''' </summary> |
146 | ''' <remarks> |
147 | ''' GetToken attempts to the match the next character(s) using the |
148 | ''' Regex rules defined in the dictionary. If a match can not be |
149 | ''' located, then an Undefined token will be created with an empty |
150 | ''' string value. In addition, the token pointer will be incremented |
151 | ''' by one so that this token doesn't attempt to get identified again by |
152 | ''' GetToken() |
153 | ''' </remarks> |
154 | Public Function GetToken() As Token(Of Tokens) |
155 | ' If we are at the end of our input string then |
156 | ' we return null to signify the end of our input string. |
157 | ' While parsing tokens, you will undoubtedly be in a loop. |
158 | ' Having your loop check for a null token is a good way to end that |
159 | ' loop |
160 | If _index >= _inputString.Length Then |
161 | Return Nothing |
162 | End If |
163 | |
164 | ' Iterate through our prepared matches/Tokens dictionary |
165 | For Each pair As KeyValuePair(Of Tokens, MatchCollection) In _regExMatchCollection |
166 | ' Iterate through each prepared match |
167 | For Each match As Match In pair.Value |
168 | ' If we find a match, update our index pointer and return a new Token object |
169 | If match.Index = _index Then |
170 | _index += match.Length |
171 | Return New Token(Of Tokens)(pair.Key, match.Value) |
172 | ElseIf match.Index > _index Then |
173 | Exit For |
174 | End If |
175 | Next |
176 | Next |
177 | |
178 | ' If execution got here, then we increment our index pointer |
179 | ' and return an Undefined token. |
180 | _index += 1 |
181 | Return New Token(Of Tokens)(UNDEFINED, "") |
182 | End Function |
183 | |
184 | ' Peek() will retrieve a PeekToken object and will allow you to see the next token |
185 | ' that GetToken() will retrieve. |
186 | ''' <summary> |
187 | ''' Returns the next token that GetToken() will return. |
188 | ''' </summary> |
189 | ''' <seealso cref="TokenParser(Of Tokens).Peek(PeekToken(Of Tokens))" /> |
190 | Public Function Peek() As PeekToken(Of Tokens) |
191 | Return Peek(New PeekToken(Of Tokens)(_index, New Token(Of Tokens)(UNDEFINED, ""))) |
192 | End Function |
193 | |
194 | ' This is an overload for Peek(). By passing in the last PeekToken object |
195 | ' received from Peek(), you can peek ahead to the next token, and the token after that, etc... |
196 | ''' <summary> |
197 | ''' Returns the next token after the Token passed here |
198 | ''' </summary> |
199 | ''' <param name="peekToken">The PeekToken token returned from a previous Peek() call</param> |
200 | ''' <seealso cref="Peek()" /> |
201 | Public Function Peek(peekToken As PeekToken(Of Tokens)) As PeekToken(Of Tokens) |
202 | Dim oldIndex As Integer = _index |
203 | |
204 | _index = peekToken.TokenIndex |
205 | |
206 | If _index >= _inputString.Length Then |
207 | _index = oldIndex |
208 | Return Nothing |
209 | End If |
210 | |
211 | For Each pair As KeyValuePair(Of Tokens, String) In _tokens |
212 | Dim r As New Regex(pair.Value) |
213 | Dim m As Match = r.Match(_inputString, _index) |
214 | |
215 | If m.Success AndAlso m.Index = _index Then |
216 | _index = _index + m.Length |
217 | Dim pt As New PeekToken(Of Tokens)(_index, New Token(Of Tokens)(pair.Key, m.Value)) |
218 | _index = oldIndex |
219 | Return pt |
220 | End If |
221 | Next |
222 | Dim pt2 As New PeekToken(Of Tokens)(_index + 1, New Token(Of Tokens)(UNDEFINED, "")) |
223 | _index = oldIndex |
224 | Return pt2 |
225 | End Function |
226 | End Class |
227 | End Namespace |