| 1 | #Region "Microsoft.VisualBasic::c878a4f80f35c6b74634e73312d5657c, Microsoft.VisualBasic.Core\Scripting\TokenIcer\TokenParser.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Class TokenParser |
| 35 | ' |
| 36 | ' Properties: InputString, UNDEFINED |
| 37 | ' |
| 38 | ' Constructor: (+2 Overloads) Sub New |
| 39 | ' |
| 40 | ' Function: GetToken, (+2 Overloads) Peek |
| 41 | ' |
| 42 | ' Sub: PrepareRegex, ResetParser |
| 43 | ' |
| 44 | ' |
| 45 | ' /********************************************************************************/ |
| 46 | |
| 47 | #End Region |
| 48 | |
| 49 | ' This file was Auto Generated with TokenIcer |
| 50 | Imports System.Collections.Generic |
| 51 | Imports System.Text.RegularExpressions |
| 52 | |
| 53 | Namespace Scripting.TokenIcer |
| 54 | |
| 55 | ''' <summary> |
| 56 | ''' TokenParser |
| 57 | ''' </summary> |
| 58 | ''' <remarks> |
| 59 | ''' TokenParser is the main parser engine for converting input into lexical tokens. |
| 60 | ''' |
| 61 | ''' Auto Generated from |
| 62 | ''' http://www.codeproject.com/Articles/220042/Easily-Create-Your-Own-Parser |
| 63 | ''' </remarks> |
| 64 | Public Class TokenParser(Of Tokens As IComparable) |
| 65 | ' This dictionary will store our RegEx rules |
| 66 | Private ReadOnly _tokens As Dictionary(Of Tokens, String) |
| 67 | ' This dictionary will store our matches |
| 68 | Private ReadOnly _regExMatchCollection As Dictionary(Of Tokens, MatchCollection) |
| 69 | ' This input string will store the string to parse |
| 70 | Private _inputString As String |
| 71 | ' This index is used internally so the parser knows where it left off |
| 72 | Private _index As Integer |
| 73 | |
| 74 | ' A public setter for our input string |
| 75 | ''' <summary> |
| 76 | ''' InputString Property |
| 77 | ''' </summary> |
| 78 | ''' <value> |
| 79 | ''' The string value that holds the input string. |
| 80 | ''' </value> |
| 81 | Public WriteOnly Property InputString() As String |
| 82 | Set |
| 83 | Call ResetParser() |
| 84 | _inputString = Value |
| 85 | Call PrepareRegex() |
| 86 | End Set |
| 87 | End Property |
| 88 | |
| 89 | Public ReadOnly Property UNDEFINED As Tokens |
| 90 | |
| 91 | ' Our Constructor, which simply initializes values |
| 92 | ''' <summary> |
| 93 | ''' Default Constructor |
| 94 | ''' </summary> |
| 95 | ''' <param name="tokens">Values is the regex expression</param> |
| 96 | ''' <remarks> |
| 97 | ''' The constructor initalizes memory and adds all of the tokens to the token dictionary. |
| 98 | ''' </remarks> |
| 99 | Public Sub New(tokens As IEnumerable(Of KeyValuePair(Of Tokens, String)), UNDEFINED As Tokens) |
| 100 | Call Me.New(tokens.ToDictionary, UNDEFINED) |
| 101 | End Sub |
| 102 | |
| 103 | ' Our Constructor, which simply initializes values |
| 104 | ''' <summary> |
| 105 | ''' Default Constructor |
| 106 | ''' </summary> |
| 107 | ''' <param name="tokens">Values is the regex expression</param> |
| 108 | ''' <remarks> |
| 109 | ''' The constructor initalizes memory and adds all of the tokens to the token dictionary. |
| 110 | ''' </remarks> |
| 111 | Public Sub New(tokens As Dictionary(Of Tokens, String), UNDEFINED As Tokens) |
| 112 | _tokens = New Dictionary(Of Tokens, String)(tokens) |
| 113 | _regExMatchCollection = New Dictionary(Of Tokens, MatchCollection)() |
| 114 | _index = 0 |
| 115 | _inputString = String.Empty |
| 116 | _UNDEFINED = UNDEFINED |
| 117 | End Sub |
| 118 | |
| 119 | ' This function preloads the matches based on our rules and the input string |
| 120 | ''' <summary> |
| 121 | ''' PrepareRegex prepares the regex for parsing by pre-matching the Regex tokens. |
| 122 | ''' </summary> |
| 123 | Private Sub PrepareRegex() |
| 124 | _regExMatchCollection.Clear() |
| 125 | For Each pair As KeyValuePair(Of Tokens, String) In _tokens |
| 126 | _regExMatchCollection.Add(pair.Key, Regex.Matches(_inputString, pair.Value)) |
| 127 | Next |
| 128 | End Sub |
| 129 | |
| 130 | ' ResetParser() will reset the parser. |
| 131 | ' Keep in mind that you must set the input string again |
| 132 | ''' <summary> |
| 133 | ''' ResetParser resets the parser to its inital state. Reloading InputString is required. |
| 134 | ''' </summary> |
| 135 | ''' <seealso cref="TokenParser.InputString" /> |
| 136 | Public Sub ResetParser() |
| 137 | _index = 0 |
| 138 | _inputString = String.Empty |
| 139 | _regExMatchCollection.Clear() |
| 140 | End Sub |
| 141 | |
| 142 | ' GetToken() retrieves the next token and returns a token object |
| 143 | ''' <summary> |
| 144 | ''' GetToken gets the next token in queue |
| 145 | ''' </summary> |
| 146 | ''' <remarks> |
| 147 | ''' GetToken attempts to the match the next character(s) using the |
| 148 | ''' Regex rules defined in the dictionary. If a match can not be |
| 149 | ''' located, then an Undefined token will be created with an empty |
| 150 | ''' string value. In addition, the token pointer will be incremented |
| 151 | ''' by one so that this token doesn't attempt to get identified again by |
| 152 | ''' GetToken() |
| 153 | ''' </remarks> |
| 154 | Public Function GetToken() As Token(Of Tokens) |
| 155 | ' If we are at the end of our input string then |
| 156 | ' we return null to signify the end of our input string. |
| 157 | ' While parsing tokens, you will undoubtedly be in a loop. |
| 158 | ' Having your loop check for a null token is a good way to end that |
| 159 | ' loop |
| 160 | If _index >= _inputString.Length Then |
| 161 | Return Nothing |
| 162 | End If |
| 163 | |
| 164 | ' Iterate through our prepared matches/Tokens dictionary |
| 165 | For Each pair As KeyValuePair(Of Tokens, MatchCollection) In _regExMatchCollection |
| 166 | ' Iterate through each prepared match |
| 167 | For Each match As Match In pair.Value |
| 168 | ' If we find a match, update our index pointer and return a new Token object |
| 169 | If match.Index = _index Then |
| 170 | _index += match.Length |
| 171 | Return New Token(Of Tokens)(pair.Key, match.Value) |
| 172 | ElseIf match.Index > _index Then |
| 173 | Exit For |
| 174 | End If |
| 175 | Next |
| 176 | Next |
| 177 | |
| 178 | ' If execution got here, then we increment our index pointer |
| 179 | ' and return an Undefined token. |
| 180 | _index += 1 |
| 181 | Return New Token(Of Tokens)(UNDEFINED, "") |
| 182 | End Function |
| 183 | |
| 184 | ' Peek() will retrieve a PeekToken object and will allow you to see the next token |
| 185 | ' that GetToken() will retrieve. |
| 186 | ''' <summary> |
| 187 | ''' Returns the next token that GetToken() will return. |
| 188 | ''' </summary> |
| 189 | ''' <seealso cref="TokenParser(Of Tokens).Peek(PeekToken(Of Tokens))" /> |
| 190 | Public Function Peek() As PeekToken(Of Tokens) |
| 191 | Return Peek(New PeekToken(Of Tokens)(_index, New Token(Of Tokens)(UNDEFINED, ""))) |
| 192 | End Function |
| 193 | |
| 194 | ' This is an overload for Peek(). By passing in the last PeekToken object |
| 195 | ' received from Peek(), you can peek ahead to the next token, and the token after that, etc... |
| 196 | ''' <summary> |
| 197 | ''' Returns the next token after the Token passed here |
| 198 | ''' </summary> |
| 199 | ''' <param name="peekToken">The PeekToken token returned from a previous Peek() call</param> |
| 200 | ''' <seealso cref="Peek()" /> |
| 201 | Public Function Peek(peekToken As PeekToken(Of Tokens)) As PeekToken(Of Tokens) |
| 202 | Dim oldIndex As Integer = _index |
| 203 | |
| 204 | _index = peekToken.TokenIndex |
| 205 | |
| 206 | If _index >= _inputString.Length Then |
| 207 | _index = oldIndex |
| 208 | Return Nothing |
| 209 | End If |
| 210 | |
| 211 | For Each pair As KeyValuePair(Of Tokens, String) In _tokens |
| 212 | Dim r As New Regex(pair.Value) |
| 213 | Dim m As Match = r.Match(_inputString, _index) |
| 214 | |
| 215 | If m.Success AndAlso m.Index = _index Then |
| 216 | _index = _index + m.Length |
| 217 | Dim pt As New PeekToken(Of Tokens)(_index, New Token(Of Tokens)(pair.Key, m.Value)) |
| 218 | _index = oldIndex |
| 219 | Return pt |
| 220 | End If |
| 221 | Next |
| 222 | Dim pt2 As New PeekToken(Of Tokens)(_index + 1, New Token(Of Tokens)(UNDEFINED, "")) |
| 223 | _index = oldIndex |
| 224 | Return pt2 |
| 225 | End Function |
| 226 | End Class |
| 227 | End Namespace |