| 1 | #Region "Microsoft.VisualBasic::a0fcd8f9dc61e9692270663703b1ba91, Microsoft.VisualBasic.Core\Language\Language\Python\Regexp.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Module re |
| 35 | ' |
| 36 | ' Function: __trimComment, FindAll |
| 37 | ' Structure Match |
| 38 | ' |
| 39 | ' Constructor: (+1 Overloads) Sub New |
| 40 | ' Function: ToString |
| 41 | ' |
| 42 | ' |
| 43 | ' |
| 44 | ' |
| 45 | ' /********************************************************************************/ |
| 46 | |
| 47 | #End Region |
| 48 | |
| 49 | Imports System.Text.RegularExpressions |
| 50 | Imports Microsoft.VisualBasic.Linq |
| 51 | |
| 52 | Namespace Language.Python |
| 53 | |
| 54 | ''' <summary> |
| 55 | ''' This module provides regular expression matching operations similar to those found in Perl. ``re`` module in the python language. |
| 56 | ''' </summary> |
| 57 | Public Module re |
| 58 | |
| 59 | Public Structure Match |
| 60 | Private __raw As System.Text.RegularExpressions.Match |
| 61 | |
| 62 | Friend Sub New(m As System.Text.RegularExpressions.Match) |
| 63 | __raw = m |
| 64 | End Sub |
| 65 | |
| 66 | Default Public ReadOnly Property Value(index%) As String |
| 67 | Get |
| 68 | If index < 0 Then |
| 69 | index = __raw.Groups.Count + index% |
| 70 | End If |
| 71 | |
| 72 | Return __raw.Groups.Item(index%).Value |
| 73 | End Get |
| 74 | End Property |
| 75 | |
| 76 | Public Shared Narrowing Operator CType(m As Match) As String |
| 77 | Return m.__raw.Value |
| 78 | End Operator |
| 79 | |
| 80 | ''' <summary> |
| 81 | ''' <see cref="System.Text.RegularExpressions.Match.Value"/> |
| 82 | ''' </summary> |
| 83 | ''' <returns></returns> |
| 84 | Public Overrides Function ToString() As String |
| 85 | Return __raw.Value |
| 86 | End Function |
| 87 | End Structure |
| 88 | |
| 89 | ''' <summary> |
| 90 | ''' Return all non-overlapping matches of pattern in string, as a list of strings. The string is scanned left-to-right, and matches are returned in the order found. |
| 91 | ''' If one or more groups are present in the pattern, return a list of groups; this will be a list of tuples if the pattern has more than one group. |
| 92 | ''' Empty matches are included in the result unless they touch the beginning of another match. |
| 93 | ''' </summary> |
| 94 | ''' <param name="pattern">这个会首先被分行然后去除掉python注释</param> |
| 95 | ''' <param name="input"></param> |
| 96 | ''' <param name="options"></param> |
| 97 | ''' <returns></returns> |
| 98 | Public Function FindAll(pattern$, input$, Optional options As RegexOptions = RegexOptions.None) As Array(Of Match) |
| 99 | Dim tokens As String() = pattern.Trim _ |
| 100 | .LineTokens _ |
| 101 | .Select(AddressOf __trimComment) _ |
| 102 | .Where(Function(s) Not String.IsNullOrEmpty(s)) _ |
| 103 | .ToArray |
| 104 | pattern = String.Join("", tokens) |
| 105 | |
| 106 | Dim ms As MatchCollection = |
| 107 | Regex.Matches(input, pattern, options) |
| 108 | Dim mlist As IEnumerable(Of Match) = |
| 109 | ms.Count _ |
| 110 | .Sequence _ |
| 111 | .Select(Function(i) New Match(ms(i))) |
| 112 | |
| 113 | Return New Array(Of Match)(mlist) |
| 114 | End Function |
| 115 | |
| 116 | ''' <summary> |
| 117 | ''' 假设所有的注释都是由#和一个空格开始起始的 ``# `` |
| 118 | ''' </summary> |
| 119 | ''' <param name="s"></param> |
| 120 | ''' <returns></returns> |
| 121 | Private Function __trimComment(s As String) As String |
| 122 | s = s.Trim |
| 123 | |
| 124 | If s.StartsWith("# ") Then Return "" ' 整行都是注释 |
| 125 | |
| 126 | Dim i As Integer = s.IndexOf("# ") |
| 127 | |
| 128 | If i > -1 Then |
| 129 | s = s.Substring(0, i).Trim |
| 130 | End If |
| 131 | |
| 132 | Return s |
| 133 | End Function |
| 134 | End Module |
| 135 | End Namespace |