| 1 |
#Region "Microsoft.VisualBasic::e5059b152be1ca568fa19373babcb314, Microsoft.VisualBasic.Core\Text\TextEncodings.vb"
|
| 2 |
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
|
| 9 |
|
| 10 |
|
| 11 |
|
| 12 |
|
| 13 |
|
| 14 |
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
|
| 19 |
|
| 20 |
|
| 21 |
|
| 22 |
|
| 23 |
|
| 24 |
|
| 25 |
|
| 26 |
|
| 27 |
|
| 28 |
|
| 29 |
|
| 30 |
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
|
| 41 |
|
| 42 |
|
| 43 |
|
| 44 |
|
| 45 |
|
| 46 |
|
| 47 |
|
| 48 |
|
| 49 |
|
| 50 |
|
| 51 |
|
| 52 |
|
| 53 |
|
| 54 |
#End Region
|
| 55 |
|
| 56 |
Imports System.Runtime.CompilerServices
|
| 57 |
Imports System.Text
|
| 58 |
Imports defaultEncoding = Microsoft.VisualBasic.Language.Default.DefaultValue(Of System.Text.Encoding)
|
| 59 |
|
| 60 |
Namespace Text
|
| 61 |
|
| 62 |
|
| 63 |
|
| 64 |
|
| 65 |
Public Enum Encodings As Byte
|
| 66 |
|
| 67 |
|
| 68 |
|
| 69 |
|
| 70 |
[Default] = 0
|
| 71 |
ASCII = 10
|
| 72 |
|
| 73 |
|
| 74 |
|
| 75 |
|
| 76 |
Unicode
|
| 77 |
UTF7
|
| 78 |
|
| 79 |
|
| 80 |
|
| 81 |
|
| 82 |
UTF8
|
| 83 |
UTF8WithoutBOM
|
| 84 |
|
| 85 |
|
| 86 |
|
| 87 |
UTF16
|
| 88 |
UTF32
|
| 89 |
|
| 90 |
|
| 91 |
|
| 92 |
|
| 93 |
GB2312
|
| 94 |
End Enum
|
| 95 |
|
| 96 |
|
| 97 |
|
| 98 |
|
| 99 |
Public Module TextEncodings
|
| 100 |
|
| 101 |
Public ReadOnly Property UTF8WithoutBOM As New UTF8Encoding(encoderShouldEmitUTF8Identifier:=False)
|
| 102 |
|
| 103 |
|
| 104 |
|
| 105 |
|
| 106 |
|
| 107 |
Public ReadOnly Property DefaultEncoding As defaultEncoding = Encoding.Default
|
| 108 |
|
| 109 |
|
| 110 |
|
| 111 |
|
| 112 |
Public ReadOnly Property UTF8 As defaultEncoding = UTF8WithoutBOM
|
| 113 |
|
| 114 |
|
| 115 |
|
| 116 |
|
| 117 |
|
| 118 |
|
| 119 |
|
| 120 |
|
| 121 |
```bash
|
| 122 |
locale -a
|
| 123 |
|
| 124 |
yum install -y mono-locale-extras
|
| 125 |
```
|
| 126 |
</remarks>
|
| 127 |
Public ReadOnly Property TextEncodings As IReadOnlyDictionary(Of Encodings, Encoding) = codePageTable()
|
| 128 |
|
| 129 |
|
| 130 |
在这个函数之中会根据当前所运行的平台对utf8编码进行一下额外的处理
|
| 131 |
|
| 132 |
|
| 133 |
<MethodImpl(MethodImplOptions.AggressiveInlining)>
|
| 134 |
Private Function codePageTable() As Dictionary(Of Encodings, Encoding)
|
| 135 |
Dim utf8 As Encoding
|
| 136 |
|
| 137 |
If App.IsMicrosoftPlatform Then
|
| 138 |
|
| 139 |
|
| 140 |
utf8 = Encoding.UTF8
|
| 141 |
Else
|
| 142 |
utf8 = UTF8WithoutBOM
|
| 143 |
End If
|
| 144 |
|
| 145 |
Return New Dictionary(Of Encodings, Encoding) From {
|
| 146 |
_
|
| 147 |
{Encodings.ASCII, Encoding.ASCII},
|
| 148 |
{Encodings.GB2312, __gbk2312_encoding()},
|
| 149 |
{Encodings.Unicode, Encoding.Unicode},
|
| 150 |
{Encodings.UTF7, Encoding.UTF7},
|
| 151 |
{Encodings.UTF32, Encoding.UTF32},
|
| 152 |
{Encodings.UTF8, utf8},
|
| 153 |
{Encodings.UTF8WithoutBOM, UTF8WithoutBOM},
|
| 154 |
{Encodings.Default, Encoding.Default},
|
| 155 |
{Encodings.UTF16, Encoding.Unicode}
|
| 156 |
}
|
| 157 |
End Function
|
| 158 |
|
| 159 |
|
| 160 |
构造函数会自动的从命令行配置之中设置默认的编码格式
|
| 161 |
|
| 162 |
|
| 163 |
###### Linux下面提示 Encoding 936 data could not be found.
|
| 164 |
|
| 165 |
处理方法
|
| 166 |
'''
|
| 167 |
1. 应该首先``locale -a``看有没有安装``gbk``
|
| 168 |
2. 没安装的话需要先安装gbk编码
|
| 169 |
3. 然后再安装``mono-locale-extras``
|
| 170 |
'''
|
| 171 |
```bash
|
| 172 |
locale -a
|
| 173 |
yum install -y mono-locale-extras
|
| 174 |
```
|
| 175 |
</remarks>
|
| 176 |
Sub New()
|
| 177 |
|
| 178 |
Dim codepage$ = App.GetVariable("default_encoding")
|
| 179 |
|
| 180 |
|
| 181 |
If codepage.StringEmpty Then
|
| 182 |
DefaultEncoding = Encoding.Default
|
| 183 |
Else
|
| 184 |
DefaultEncoding = Text _
|
| 185 |
.ParseEncodingsName(codepage, Encodings.Default) _
|
| 186 |
.CodePage
|
| 187 |
|
| 188 |
Call $"*default_encoding* have been changed to {DefaultEncoding.DefaultValue.ToString}".__INFO_ECHO
|
| 189 |
End If
|
| 190 |
|
| 191 |
|
| 192 |
If TextEncodings(Encodings.GB2312) Is Encoding.UTF8 Then
|
| 193 |
Call {
|
| 194 |
"You can just ignore this warning, or fix this warning by enable the gb2312 encoding on your server.",
|
| 195 |
"For enable the gb2312 encoding, you can run commands:",
|
| 196 |
"",
|
| 197 |
" yum install -y mono-locale-extras",
|
| 198 |
""
|
| 199 |
}.JoinBy(ASCII.LF) _
|
| 200 |
.Warning
|
| 201 |
End If
|
| 202 |
End Sub
|
| 203 |
|
| 204 |
Const gb2312_not_enable$ = "It seems that your Linux server didn
|
| 205 |
|
| 206 |
|
| 207 |
在linux上面如果没有安装gb2312的话,会出错,则这个函数会默认使用UTF8编码
|
| 208 |
并给出警告信息
|
| 209 |
|
| 210 |
|
| 211 |
|
| 212 |
If the linux server didn
|
| 213 |
|
| 214 |
```bash
|
| 215 |
[ERROR] FATAL UNHANDLED EXCEPTION: System.Exception: [Path] /home/software/cytonetwork.test/cytonetwork ---> System.Exception: [DIR] /home/software/cytonetwork.test ---> System.TypeInitializationException: The type initializer for
|
| 216 |
at System.Text.Encoding.GetEncoding (System.Int32 codepage) [0x0023f] In <902ab9e386384bec9c07fa19aa938869>:0
|
| 217 |
at System.Text.Encoding.GetEncoding (System.String name) [0x00012] In <902ab9e386384bec9c07fa19aa938869>:0
|
| 218 |
at Microsoft.VisualBasic.Text.TextEncodings..cctor () [0x00030] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 219 |
--- End of inner exception stack trace ---
|
| 220 |
at Microsoft.VisualBasic.TextDoc.SaveTo (System.String text, System.String path, System.Text.Encoding encoding, System.Boolean append, System.Boolean throwEx) [0x00063] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 221 |
--- End of inner exception stack trace ---
|
| 222 |
--- End of inner exception stack trace ---
|
| 223 |
at Microsoft.VisualBasic.TextDoc.SaveTo (System.String text, System.String path, System.Text.Encoding encoding, System.Boolean append, System.Boolean throwEx) [0x000a7] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 224 |
at Microsoft.VisualBasic.Language.UnixBash.LinuxRunHelper.BashShell () [0x0001b] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 225 |
at Microsoft.VisualBasic.CommandLine.Interpreter.__methodInvoke (System.String commandName, System.Object[] argvs, System.String[] help_argvs) [0x001c9] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 226 |
at Microsoft.VisualBasic.CommandLine.Interpreter.Execute (Microsoft.VisualBasic.CommandLine.CommandLine args) [0x00024] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 227 |
at Microsoft.VisualBasic.App.RunCLI (System.Type Interpreter, Microsoft.VisualBasic.CommandLine.CommandLine args, System.String caller) [0x00012] In <00ade39f7ffc4ab69ceb325aefc4ee1b>:0
|
| 228 |
at cytonetwork.Program.Main () [0x0000f] In <0Fa3aca1569b43dc8ca208295f3a029d>:0
|
| 229 |
```
|
| 230 |
</remarks>
|
| 231 |
Private Function __gbk2312_encoding() As Encoding
|
| 232 |
Try
|
| 233 |
Return Encoding.GetEncoding("GB2312")
|
| 234 |
Catch ex As Exception
|
| 235 |
|
| 236 |
Call App.LogException(ex)
|
| 237 |
|
| 238 |
If Not App.IsMicrosoftPlatform Then
|
| 239 |
Call gb2312_not_enable.Warning
|
| 240 |
End If
|
| 241 |
|
| 242 |
Return Encoding.UTF8
|
| 243 |
End Try
|
| 244 |
End Function
|
| 245 |
|
| 246 |
|
| 247 |
Get text file save <see cref="Encoding"/> instance
|
| 248 |
|
| 249 |
<param name="value"></param>
|
| 250 |
|
| 251 |
<MethodImpl(MethodImplOptions.AggressiveInlining)>
|
| 252 |
<Extension> Public Function CodePage(value As Encodings) As Encoding
|
| 253 |
If _TextEncodings.ContainsKey(value) Then
|
| 254 |
Return _TextEncodings(value)
|
| 255 |
Else
|
| 256 |
Return Encoding.UTF8
|
| 257 |
End If
|
| 258 |
End Function
|
| 259 |
|
| 260 |
|
| 261 |
从字符串名称之中解析出编码格式的枚举值
|
| 262 |
|
| 263 |
<param name="encoding$"></param>
|
| 264 |
<param name="onFailure"></param>
|
| 265 |
|
| 266 |
<Extension> Public Function ParseEncodingsName(encoding$, Optional onFailure As Encodings = Encodings.ASCII) As Encodings
|
| 267 |
For Each key In TextEncodings.Keys
|
| 268 |
If encoding.TextEquals(key.ToString) Then
|
| 269 |
Return key
|
| 270 |
End If
|
| 271 |
Next
|
| 272 |
|
| 273 |
Return onFailure
|
| 274 |
End Function
|
| 275 |
|
| 276 |
<MethodImpl(MethodImplOptions.AggressiveInlining)>
|
| 277 |
<Extension>
|
| 278 |
Public Function CodePage(encodingName$, Optional [default] As Encodings = Encodings.Default) As Encoding
|
| 279 |
Return encodingName.ParseEncodingsName(onFailure:=[default]).CodePage
|
| 280 |
End Function
|
| 281 |
|
| 282 |
Public Function GetEncodings(value As Encoding) As Encodings
|
| 283 |
Dim Name As String = value.ToString.Split("."c).Last
|
| 284 |
|
| 285 |
Select Case Name
|
| 286 |
Case NameOf(Encodings.ASCII) : Return Encodings.ASCII
|
| 287 |
Case NameOf(Encodings.GB2312) : Return Encodings.GB2312
|
| 288 |
Case NameOf(Encodings.Unicode) : Return Encodings.Unicode
|
| 289 |
Case NameOf(Encodings.UTF32) : Return Encodings.UTF32
|
| 290 |
Case NameOf(Encodings.UTF7) : Return Encodings.UTF7
|
| 291 |
Case NameOf(Encodings.UTF8) : Return Encodings.UTF8
|
| 292 |
Case NameOf(Encodings.Default) : Return Encodings.Default
|
| 293 |
Case Else
|
| 294 |
Return Encodings.UTF8
|
| 295 |
End Select
|
| 296 |
End Function
|
| 297 |
|
| 298 |
|
| 299 |
有时候有些软件对文本的编码是有要求的,则可以使用这个函数进行文本编码的转换
|
| 300 |
例如R程序默认是读取ASCII,而。NET的默认编码是UTF8,则可以使用这个函数将目标文本文件转换为ASCII编码的文本文件
|
| 301 |
|
| 302 |
<param name="path"></param>
|
| 303 |
<param name="encoding"></param>
|
| 304 |
<param name="from"></param>
|
| 305 |
|
| 306 |
<Extension>
|
| 307 |
Public Function TransEncoding(path$, encoding As Encodings, Optional from As Encoding = Nothing) As Boolean
|
| 308 |
If Not path.FileExists Then
|
| 309 |
Call "".SaveTo(path, encoding.CodePage)
|
| 310 |
End If
|
| 311 |
|
| 312 |
Dim tmp$ = If(from Is Nothing, IO.File.ReadAllText(path), IO.File.ReadAllText(path, from))
|
| 313 |
Return tmp.SaveTo(path, encoding.CodePage)
|
| 314 |
End Function
|
| 315 |
|
| 316 |
<MethodImpl(MethodImplOptions.AggressiveInlining)>
|
| 317 |
<Extension>
|
| 318 |
Public Function CodeArray(chars As IEnumerable(Of Char)) As Integer()
|
| 319 |
Return chars.Select(AddressOf AscW).ToArray
|
| 320 |
End Function
|
| 321 |
End Module
|
| 322 |
End Namespace
|