1 | #Region "Microsoft.VisualBasic::702658a0f3035fd8221dbcb9eb5490c9, Microsoft.VisualBasic.Core\Net\DomainParser.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Module DomainParser |
35 | ' |
36 | ' Function: Trim, TrimPathAndQuery, (+2 Overloads) TryParse |
37 | ' |
38 | ' |
39 | ' /********************************************************************************/ |
40 | |
41 | #End Region |
42 | |
43 | Imports Microsoft.VisualBasic.Language |
44 | |
45 | Namespace Net |
46 | |
47 | ''' <summary> |
48 | ''' http://sub.domain.com/somefolder/index.html -> domain.com |
49 | ''' somedomain.info -> somedomain.info |
50 | ''' http://anotherdomain.org/home -> anotherdomain.org |
51 | ''' www.subdomain.anothersubdomain.maindomain.com/something/ -> maindomain.com |
52 | ''' </summary> |
53 | Public Module DomainParser |
54 | |
55 | ''' <summary> |
56 | ''' 解析错误会返回空字符串 |
57 | ''' </summary> |
58 | ''' <param name="url"></param> |
59 | ''' <returns></returns> |
60 | Public Function TryParse(url As String, Optional preserveSubdomain As Boolean = False) As String |
61 | url = Trim(url) |
62 | url = TrimPathAndQuery(url, preserveSubdomain) |
63 | Return url |
64 | End Function |
65 | |
66 | Public Function TryParse(url As Value(Of String), ByRef DomainName As DomainName) As Boolean |
67 | If String.IsNullOrEmpty(url = TryParse(+url)) Then |
68 | Return False |
69 | End If |
70 | |
71 | DomainName = New DomainName(+url) |
72 | Return True |
73 | End Function |
74 | |
75 | Private Function TrimPathAndQuery(url As String, preserveSubdomain As Boolean) As String |
76 | url = url.Split(CChar("/")).First |
77 | |
78 | If preserveSubdomain Then |
79 | Return url |
80 | End If |
81 | |
82 | Dim tokens As New List(Of String)(url.Split(CChar("."))) |
83 | |
84 | If tokens.Count = 2 Then |
85 | Return url |
86 | ElseIf tokens.Count = 1 Then |
87 | Return "" |
88 | End If |
89 | |
90 | ' 剩下的这些事token数量大于等于3的情况 |
91 | Dim tld2 As String = tokens(tokens.Count - 2) ' 处理类似于.com.cn这种情况 |
92 | |
93 | ' .com.cn |
94 | ' .co.uk |
95 | ' .ac.cn |
96 | If InStr("co|ac|com|org|net|edu", tld2, CompareMethod.Text) > 0 Then ' .com.cn,,..co.uk的情况,则直接返回 |
97 | ' 取最后的三个token |
98 | If tokens.Count > 3 Then |
99 | tokens = New List(Of String)(tokens.GetRange(tokens.Count - 3, 3)) |
100 | End If |
101 | url = String.Join(".", tokens.ToArray) |
102 | Else |
103 | url = $"{tokens(tokens.Count - 2)}.{tokens(tokens.Count - 1)}" |
104 | End If |
105 | |
106 | Return url |
107 | End Function |
108 | |
109 | Private Function Trim(url As String) As String |
110 | |
111 | For Each protocol As String In {"http://", "file://", "https://", "ftp://"} |
112 | If InStr(url, protocol, CompareMethod.Text) = 1 Then |
113 | url = Mid(url, Len(protocol) + 1) |
114 | Return url |
115 | End If |
116 | Next |
117 | |
118 | If InStr(url, "mailto://", CompareMethod.Text) = 1 Then |
119 | url = url.Split("@"c).Last |
120 | End If |
121 | |
122 | Return url |
123 | End Function |
124 | End Module |
125 | End Namespace |