| 1 | #Region "Microsoft.VisualBasic::702658a0f3035fd8221dbcb9eb5490c9, Microsoft.VisualBasic.Core\Net\DomainParser.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Module DomainParser |
| 35 | ' |
| 36 | ' Function: Trim, TrimPathAndQuery, (+2 Overloads) TryParse |
| 37 | ' |
| 38 | ' |
| 39 | ' /********************************************************************************/ |
| 40 | |
| 41 | #End Region |
| 42 | |
| 43 | Imports Microsoft.VisualBasic.Language |
| 44 | |
| 45 | Namespace Net |
| 46 | |
| 47 | ''' <summary> |
| 48 | ''' http://sub.domain.com/somefolder/index.html -> domain.com |
| 49 | ''' somedomain.info -> somedomain.info |
| 50 | ''' http://anotherdomain.org/home -> anotherdomain.org |
| 51 | ''' www.subdomain.anothersubdomain.maindomain.com/something/ -> maindomain.com |
| 52 | ''' </summary> |
| 53 | Public Module DomainParser |
| 54 | |
| 55 | ''' <summary> |
| 56 | ''' 解析错误会返回空字符串 |
| 57 | ''' </summary> |
| 58 | ''' <param name="url"></param> |
| 59 | ''' <returns></returns> |
| 60 | Public Function TryParse(url As String, Optional preserveSubdomain As Boolean = False) As String |
| 61 | url = Trim(url) |
| 62 | url = TrimPathAndQuery(url, preserveSubdomain) |
| 63 | Return url |
| 64 | End Function |
| 65 | |
| 66 | Public Function TryParse(url As Value(Of String), ByRef DomainName As DomainName) As Boolean |
| 67 | If String.IsNullOrEmpty(url = TryParse(+url)) Then |
| 68 | Return False |
| 69 | End If |
| 70 | |
| 71 | DomainName = New DomainName(+url) |
| 72 | Return True |
| 73 | End Function |
| 74 | |
| 75 | Private Function TrimPathAndQuery(url As String, preserveSubdomain As Boolean) As String |
| 76 | url = url.Split(CChar("/")).First |
| 77 | |
| 78 | If preserveSubdomain Then |
| 79 | Return url |
| 80 | End If |
| 81 | |
| 82 | Dim tokens As New List(Of String)(url.Split(CChar("."))) |
| 83 | |
| 84 | If tokens.Count = 2 Then |
| 85 | Return url |
| 86 | ElseIf tokens.Count = 1 Then |
| 87 | Return "" |
| 88 | End If |
| 89 | |
| 90 | ' 剩下的这些事token数量大于等于3的情况 |
| 91 | Dim tld2 As String = tokens(tokens.Count - 2) ' 处理类似于.com.cn这种情况 |
| 92 | |
| 93 | ' .com.cn |
| 94 | ' .co.uk |
| 95 | ' .ac.cn |
| 96 | If InStr("co|ac|com|org|net|edu", tld2, CompareMethod.Text) > 0 Then ' .com.cn,,..co.uk的情况,则直接返回 |
| 97 | ' 取最后的三个token |
| 98 | If tokens.Count > 3 Then |
| 99 | tokens = New List(Of String)(tokens.GetRange(tokens.Count - 3, 3)) |
| 100 | End If |
| 101 | url = String.Join(".", tokens.ToArray) |
| 102 | Else |
| 103 | url = $"{tokens(tokens.Count - 2)}.{tokens(tokens.Count - 1)}" |
| 104 | End If |
| 105 | |
| 106 | Return url |
| 107 | End Function |
| 108 | |
| 109 | Private Function Trim(url As String) As String |
| 110 | |
| 111 | For Each protocol As String In {"http://", "file://", "https://", "ftp://"} |
| 112 | If InStr(url, protocol, CompareMethod.Text) = 1 Then |
| 113 | url = Mid(url, Len(protocol) + 1) |
| 114 | Return url |
| 115 | End If |
| 116 | Next |
| 117 | |
| 118 | If InStr(url, "mailto://", CompareMethod.Text) = 1 Then |
| 119 | url = url.Split("@"c).Last |
| 120 | End If |
| 121 | |
| 122 | Return url |
| 123 | End Function |
| 124 | End Module |
| 125 | End Namespace |