| 1 |
#Region "Microsoft.VisualBasic::1099d60b78992290328f879c558bae05, Microsoft.VisualBasic.Core\ComponentModel\DataSource\Tsv.vb"
|
| 2 |
|
| 3 |
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
|
| 8 |
|
| 9 |
|
| 10 |
|
| 11 |
|
| 12 |
|
| 13 |
|
| 14 |
|
| 15 |
|
| 16 |
|
| 17 |
|
| 18 |
|
| 19 |
|
| 20 |
|
| 21 |
|
| 22 |
|
| 23 |
|
| 24 |
|
| 25 |
|
| 26 |
|
| 27 |
|
| 28 |
|
| 29 |
|
| 30 |
|
| 31 |
|
| 32 |
|
| 33 |
|
| 34 |
|
| 35 |
|
| 36 |
|
| 37 |
|
| 38 |
|
| 39 |
|
| 40 |
|
| 41 |
#End Region
|
| 42 |
|
| 43 |
Imports System.IO
|
| 44 |
Imports System.Reflection
|
| 45 |
Imports System.Runtime.CompilerServices
|
| 46 |
Imports System.Text
|
| 47 |
Imports Microsoft.VisualBasic.ComponentModel.Collection
|
| 48 |
Imports Microsoft.VisualBasic.ComponentModel.DataSourceModel.SchemaMaps
|
| 49 |
Imports Microsoft.VisualBasic.Language
|
| 50 |
Imports Microsoft.VisualBasic.Language.Default
|
| 51 |
Imports Microsoft.VisualBasic.Linq
|
| 52 |
Imports Microsoft.VisualBasic.Text
|
| 53 |
Imports FieldTuple = System.Collections.Generic.KeyValuePair(Of String, System.Reflection.PropertyInfo)
|
| 54 |
Imports RowTokens = System.Collections.Generic.IEnumerable(Of System.String)
|
| 55 |
|
| 56 |
Namespace ComponentModel.DataSourceModel
|
| 57 |
|
| 58 |
|
| 59 |
|
| 60 |
|
| 61 |
|
| 62 |
|
| 63 |
Public Module TsvFileIO
|
| 64 |
|
| 65 |
|
| 66 |
|
| 67 |
|
| 68 |
|
| 69 |
|
| 70 |
|
| 71 |
Public Iterator Function Load(Of T As Class)(path$, Optional encoding As Encodings = Encodings.UTF8) As IEnumerable(Of T)
|
| 72 |
Dim data As IEnumerable(Of RowTokens) = TsvFileIO.LoadFile(path, encoding.CodePage, skipFirstLine:=True)
|
| 73 |
Dim tableSchema = DataFramework.Schema(Of T)(PropertyAccess.ReadWrite, True)
|
| 74 |
Dim type As Type = GetType(T)
|
| 75 |
Dim schemaOrdinals As Index(Of String) =
|
| 76 |
path _
|
| 77 |
.OpenReader(encoding.CodePage) _
|
| 78 |
.GetTsvHeader(False)
|
| 79 |
Dim typers = tableSchema.ToDictionary(
|
| 80 |
Function(m) m.Key,
|
| 81 |
Function(p) p.Value.PropertyType)
|
| 82 |
|
| 83 |
For Each line As String() In data.Select(Function(r) DirectCast(r, String()))
|
| 84 |
Dim o As Object = Activator.CreateInstance(type)
|
| 85 |
|
| 86 |
For Each field As FieldTuple In tableSchema
|
| 87 |
With field
|
| 88 |
Dim index As Integer = schemaOrdinals(.Key)
|
| 89 |
Dim s$ = line(index)
|
| 90 |
Dim value As Object = Scripting.CTypeDynamic(s, typers(.Key))
|
| 91 |
|
| 92 |
Call .Value.SetValue(o, value)
|
| 93 |
End With
|
| 94 |
Next
|
| 95 |
|
| 96 |
Yield DirectCast(o, T)
|
| 97 |
Next
|
| 98 |
End Function
|
| 99 |
|
| 100 |
|
| 101 |
|
| 102 |
|
| 103 |
|
| 104 |
|
| 105 |
|
| 106 |
|
| 107 |
|
| 108 |
|
| 109 |
|
| 110 |
Public Iterator Function LoadByIndex(Of T As Class)(path$, Optional encoding As Encodings = Encodings.UTF8, Optional base% = 0) As IEnumerable(Of T)
|
| 111 |
Dim data As IEnumerable(Of RowTokens) = TsvFileIO.LoadFile(path, encoding.CodePage, skipFirstLine:=False)
|
| 112 |
Dim type As Type = GetType(T)
|
| 113 |
Dim index = DataFrameColumnAttribute _
|
| 114 |
.LoadMapping(type, mapsAll:=False) _
|
| 115 |
.Values _
|
| 116 |
.OrderBy(Function(field)
|
| 117 |
Return field.field.Index
|
| 118 |
End Function) _
|
| 119 |
.ToDictionary(Function(i) i.field.Index)
|
| 120 |
Dim str$
|
| 121 |
Dim fields As PropertyInfo() = data _
|
| 122 |
.First _
|
| 123 |
.Count _
|
| 124 |
.SeqIterator(offset:=base) _
|
| 125 |
.Select(Function(i)
|
| 126 |
|
| 127 |
|
| 128 |
If index.ContainsKey(i) Then
|
| 129 |
Return DirectCast(index(i).member, PropertyInfo)
|
| 130 |
Else
|
| 131 |
Return Nothing
|
| 132 |
End If
|
| 133 |
End Function) _
|
| 134 |
.ToArray
|
| 135 |
|
| 136 |
For Each line As RowTokens In data
|
| 137 |
Dim o = Activator.CreateInstance(type)
|
| 138 |
|
| 139 |
For Each col As SeqValue(Of String) In line.SeqIterator
|
| 140 |
str = col.value
|
| 141 |
|
| 142 |
With fields(col)
|
| 143 |
If Not .IsNothing Then
|
| 144 |
Call .SetValue(
|
| 145 |
obj:=o,
|
| 146 |
value:=Scripting.CTypeDynamic(str, .PropertyType))
|
| 147 |
End If
|
| 148 |
End With
|
| 149 |
Next
|
| 150 |
|
| 151 |
Yield DirectCast(o, T)
|
| 152 |
Next
|
| 153 |
End Function
|
| 154 |
|
| 155 |
|
| 156 |
|
| 157 |
|
| 158 |
ReadOnly withoutProcess As New DefaultValue(Of Func(Of String, String))(Function(str) str)
|
| 159 |
|
| 160 |
|
| 161 |
|
| 162 |
|
| 163 |
<param name="stream"></param>
|
| 164 |
<param name="lower"></param>
|
| 165 |
<param name="process"></param>
|
| 166 |
|
| 167 |
<remarks>
|
| 168 |
Linux平台上面的mono这里有bug,为什么<see cref="StreamReader.ReadLine()"/>一直都输出空值?
|
| 169 |
</remarks>
|
| 170 |
<Extension>
|
| 171 |
Public Function GetTsvHeader(stream As StreamReader,
|
| 172 |
Optional lower As Boolean = False,
|
| 173 |
Optional process As Func(Of String, String) = Nothing) As Index(Of String)
|
| 174 |
|
| 175 |
Dim line$ = stream.ReadLine
|
| 176 |
Dim headers$() = line _
|
| 177 |
.Split(ASCII.TAB) _
|
| 178 |
.Select(selector:=process Or withoutProcess) _
|
| 179 |
.ToArray
|
| 180 |
|
| 181 |
If lower Then
|
| 182 |
Return headers _
|
| 183 |
.Select(AddressOf Strings.LCase) _
|
| 184 |
.Indexing
|
| 185 |
Else
|
| 186 |
Return New Index(Of String)(headers)
|
| 187 |
End If
|
| 188 |
End Function
|
| 189 |
|
| 190 |
|
| 191 |
读取文件并且按照TAb进行分割
|
| 192 |
|
| 193 |
<param name="path"></param>
|
| 194 |
<param name="skipFirstLine">The first line of the text document maybe is the title headers, skip this line?</param>
|
| 195 |
|
| 196 |
Private Function LoadFile(path$, Optional encoding As Encoding = Nothing, Optional skipFirstLine As Boolean = False) As IEnumerable(Of RowTokens)
|
| 197 |
Dim lines As String() = TextDoc.ReadAllLines(path, encoding Or UTF8)
|
| 198 |
Dim LQuery = LinqAPI.Exec(Of RowTokens) _
|
| 199 |
_
|
| 200 |
() <= From strLine As String
|
| 201 |
In lines
|
| 202 |
Let t As String() = Strings.Split(strLine, vbTab)
|
| 203 |
Select DirectCast(t, RowTokens)
|
| 204 |
|
| 205 |
If skipFirstLine Then
|
| 206 |
Return LQuery.Skip(1)
|
| 207 |
Else
|
| 208 |
Return LQuery
|
| 209 |
End If
|
| 210 |
End Function
|
| 211 |
End Module
|
| 212 |
End Namespace
|