1 |
#Region "Microsoft.VisualBasic::da9221189fd1ce2c04d99ca0f8e9821d, Microsoft.VisualBasic.Core\Extensions\Math\Correlations\Ranking.vb"
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
#End Region
|
51 |
|
52 |
Imports System.Runtime.CompilerServices
|
53 |
Imports Microsoft.VisualBasic.Linq
|
54 |
|
55 |
Namespace Math.Correlations
|
56 |
|
57 |
|
58 |
|
59 |
|
60 |
|
61 |
|
62 |
|
63 |
|
64 |
|
65 |
> https://en.wikipedia.org/wiki/Ranking
|
66 |
</summary>
|
67 |
Public Module Ranking
|
68 |
|
69 |
|
70 |
###### Strategies for assigning rankings
|
71 |
|
72 |
It is not always possible to assign rankings uniquely. For example, in a race
|
73 |
or competition two (or more) entrants might tie for a place in the ranking.
|
74 |
When computing an ordinal measurement, two (or more) of the quantities being
|
75 |
ranked might measure equal. In these cases, one of the strategies shown below
|
76 |
for assigning the rankings may be adopted. A common shorthand way to distinguish
|
77 |
these ranking strategies is by the ranking numbers that would be produced for
|
78 |
four items, with the first item ranked ahead of the second and third (which
|
79 |
compare equal) which are both ranked ahead of the fourth.
|
80 |
</summary>
|
81 |
Public Enum Strategies As Integer
|
82 |
StandardCompetition = 1224
|
83 |
ModifiedCompetition = 1334
|
84 |
DenseRanking = 1223
|
85 |
OrdinalRanking = 1234
|
86 |
FractionalRanking = 1 + 2.5 + 2.5 + 4
|
87 |
End Enum
|
88 |
|
89 |
<MethodImpl(MethodImplOptions.AggressiveInlining)>
|
90 |
<Extension>
|
91 |
Public Function Ranking(Of C As IComparable)(list As IEnumerable(Of C), Optional strategy As Strategies = Strategies.OrdinalRanking, Optional desc As Boolean = False) As Double()
|
92 |
If strategy = Strategies.OrdinalRanking Then
|
93 |
Return list.OrdinalRanking(desc)
|
94 |
ElseIf strategy = Strategies.DenseRanking Then
|
95 |
Return list.DenseRanking(desc)
|
96 |
ElseIf strategy = Strategies.FractionalRanking Then
|
97 |
Return list.FractionalRanking(desc)
|
98 |
ElseIf strategy = Strategies.StandardCompetition Then
|
99 |
Return list.StandardCompetitionRanking(desc)
|
100 |
ElseIf strategy = Strategies.ModifiedCompetition Then
|
101 |
Return list.ModifiedCompetitionRanking(desc)
|
102 |
Else
|
103 |
Throw New NotImplementedException
|
104 |
End If
|
105 |
End Function
|
106 |
|
107 |
|
108 |
###### Modified competition ranking ("1334" ranking)
|
109 |
|
110 |
Sometimes, competition ranking is done by leaving the gaps in the ranking numbers before the sets
|
111 |
of equal-ranking items (rather than after them as in standard competition ranking).[where?] The
|
112 |
number of ranking numbers that are left out in this gap remains one less than the number of items that
|
113 |
compared equal. Equivalently, each item
|
114 |
to it or above it. This ranking ensures that a competitor only comes second if they score higher than
|
115 |
all but one of their opponents, third if they score higher than all but two of their opponents, etc.
|
116 |
|
117 |
Thus if A ranks ahead of B and C (which compare equal) which are both ranked head of D, then A gets
|
118 |
ranking number 1 ("first"), B gets ranking number 3 ("joint third"), C also gets ranking number 3
|
119 |
("joint third") and D gets ranking number 4 ("fourth"). In this case, nobody would get ranking number
|
120 |
2 ("second") and that would be left as a gap.
|
121 |
</summary>
|
122 |
<typeparam name="C"></typeparam>
|
123 |
<param name="list"></param>
|
124 |
<returns></returns>
|
125 |
<Extension> Public Function ModifiedCompetitionRanking(Of C As IComparable)(list As IEnumerable(Of C), Optional desc As Boolean = False) As Double()
|
126 |
Dim array = list _
|
127 |
.SeqIterator _
|
128 |
.ToDictionary(Function(x) x,
|
129 |
Function(i) i.i)
|
130 |
Dim asc() = array _
|
131 |
.Keys _
|
132 |
.Sort(Function(x) x.value, desc) _
|
133 |
.ToArray
|
134 |
Dim ranks#() = New Double(asc.Length - 1) {}
|
135 |
Dim rank% = 0
|
136 |
Dim gaps = array _
|
137 |
.Keys _
|
138 |
.GroupBy(Function(x) x.value) _
|
139 |
.ToDictionary(Function(x) x.First.value,
|
140 |
Function(g) g.Count)
|
141 |
Dim previous As C = asc.Last.value
|
142 |
|
143 |
For i As Integer = 0 To asc.Length - 1
|
144 |
|
145 |
With asc(i)
|
146 |
If .value.CompareTo(previous) = 0 Then
|
147 |
|
148 |
ElseIf gaps.ContainsKey(.value) Then
|
149 |
previous = .value
|
150 |
rank += gaps(.value)
|
151 |
Else
|
152 |
rank += 1
|
153 |
End If
|
154 |
End With
|
155 |
|
156 |
ranks(array(asc(i))) = rank
|
157 |
Next
|
158 |
|
159 |
Return ranks
|
160 |
End Function
|
161 |
|
162 |
|
163 |
###### Standard competition ranking ("1224" ranking)
|
164 |
|
165 |
In competition ranking, items that compare equal receive the same ranking number, and then a gap
|
166 |
is left in the ranking numbers. The number of ranking numbers that are left out in this gap is
|
167 |
one less than the number of items that compared equal. Equivalently, each item
|
168 |
is 1 plus the number of items ranked above it. This ranking strategy is frequently adopted for
|
169 |
competitions, as it means that if two (or more) competitors tie for a position in the ranking,
|
170 |
the position of all those ranked below them is unaffected (i.e., a competitor only comes second if
|
171 |
exactly one person scores better than them, third if exactly two people score better than them,
|
172 |
fourth if exactly three people score better than them, etc.).
|
173 |
|
174 |
Thus if A ranks ahead of B and C (which compare equal) which are both ranked ahead of D, then A
|
175 |
gets ranking number 1 ("first"), B gets ranking number 2 ("joint second"), C also gets ranking
|
176 |
number 2 ("joint second") and D gets ranking number 4 ("fourth").
|
177 |
</summary>
|
178 |
<typeparam name="C"></typeparam>
|
179 |
<param name="list"></param>
|
180 |
<returns></returns>
|
181 |
<Extension> Public Function StandardCompetitionRanking(Of C As IComparable)(list As IEnumerable(Of C), Optional desc As Boolean = False) As Double()
|
182 |
Dim array = list _
|
183 |
.SeqIterator _
|
184 |
.ToDictionary(Function(x) x,
|
185 |
Function(i) i.i)
|
186 |
Dim asc() = array _
|
187 |
.Keys _
|
188 |
.Sort(Function(x) x.value, desc) _
|
189 |
.ToArray
|
190 |
Dim ranks#() = New Double(asc.Length - 1) {}
|
191 |
Dim rank% = 1
|
192 |
Dim gap% = 1
|
193 |
|
194 |
For i As Integer = 0 To asc.Length - 2
|
195 |
With asc(i)
|
196 |
|
197 |
ranks(array(asc(i))) = rank
|
198 |
|
199 |
If .value.CompareTo(asc(i + 1).value) <> 0 Then
|
200 |
rank += gap
|
201 |
gap = 1
|
202 |
Else
|
203 |
gap += 1
|
204 |
End If
|
205 |
End With
|
206 |
Next
|
207 |
|
208 |
ranks(array(asc.Last)) = rank
|
209 |
|
210 |
Return ranks
|
211 |
End Function
|
212 |
|
213 |
|
214 |
###### Dense ranking ("1223" ranking)
|
215 |
|
216 |
In dense ranking, items that compare equal receive the same ranking number, and the next item(s)
|
217 |
receive the immediately following ranking number. Equivalently, each itemis 1
|
218 |
plus the number of items ranked above it that are distinct with respect to the ranking order.
|
219 |
|
220 |
Thus if A ranks ahead of B and C (which compare equal) which are both ranked ahead of D, then A
|
221 |
gets ranking number 1 ("first"), B gets ranking number 2 ("joint second"), C also gets ranking
|
222 |
number 2 ("joint second") and D gets ranking number 3 ("third").
|
223 |
</summary>
|
224 |
<typeparam name="C"></typeparam>
|
225 |
<param name="list"></param>
|
226 |
<returns></returns>
|
227 |
<Extension> Public Function DenseRanking(Of C As IComparable)(list As IEnumerable(Of C), Optional desc As Boolean = False) As Double()
|
228 |
Dim array = list _
|
229 |
.SeqIterator _
|
230 |
.ToDictionary(Function(x) x,
|
231 |
Function(i) i.i)
|
232 |
Dim asc() = array _
|
233 |
.Keys _
|
234 |
.Sort(Function(x) x.value, desc) _
|
235 |
.ToArray
|
236 |
Dim ranks#() = New Double(asc.Length - 1) {}
|
237 |
Dim rank% = 1
|
238 |
|
239 |
For i As Integer = 0 To asc.Length - 2
|
240 |
With asc(i)
|
241 |
|
242 |
ranks(array(asc(i))) = rank
|
243 |
|
244 |
If .value.CompareTo(asc(i + 1).value) <> 0 Then
|
245 |
rank += 1
|
246 |
End If
|
247 |
End With
|
248 |
Next
|
249 |
|
250 |
ranks(array(asc.Last)) = rank
|
251 |
|
252 |
Return ranks
|
253 |
End Function
|
254 |
|
255 |
|
256 |
###### Ordinal ranking ("1234" ranking)
|
257 |
|
258 |
In ordinal ranking, all items receive distinct ordinal numbers, including items that compare equal.
|
259 |
The assignment of distinct ordinal numbers to items that compare equal can be done at random,
|
260 |
or arbitrarily, but it is generally preferable to use a system that is arbitrary but consistent,
|
261 |
as this gives stable results if the ranking is done multiple times. An example of an arbitrary but
|
262 |
consistent system would be to incorporate other attributes into the ranking order (such as
|
263 |
alphabetical ordering of the competitor
|
264 |
|
265 |
With this strategy, if A ranks ahead of B and C (which compare equal) which are both ranked ahead of D,
|
266 |
then A gets ranking number 1 ("first") and D gets ranking number 4 ("fourth"), and either B gets
|
267 |
ranking number 2 ("second") and C gets ranking number 3 ("third") or C gets ranking number 2 ("second")
|
268 |
and B gets ranking number 3 ("third").
|
269 |
|
270 |
In computer data processing, ordinal ranking is also referred to as "row numbering".
|
271 |
</summary>
|
272 |
<typeparam name="C"></typeparam>
|
273 |
<param name="list"></param>
|
274 |
<returns></returns>
|
275 |
<Extension> Public Function OrdinalRanking(Of C As IComparable)(list As IEnumerable(Of C), Optional desc As Boolean = False) As Double()
|
276 |
Dim array = list _
|
277 |
.SeqIterator _
|
278 |
.ToDictionary(Function(x) x,
|
279 |
Function(i) i.i)
|
280 |
Dim asc() = array _
|
281 |
.Keys _
|
282 |
.Sort(Function(x) x.value, desc) _
|
283 |
.ToArray
|
284 |
Dim ranks#() = New Double(asc.Length - 1) {}
|
285 |
Dim rank% = 1
|
286 |
|
287 |
For i As Integer = 0 To asc.Length - 1
|
288 |
|
289 |
ranks(array(asc(i))) = rank
|
290 |
rank += 1
|
291 |
Next
|
292 |
|
293 |
Return ranks
|
294 |
End Function
|
295 |
|
296 |
|
297 |
###### Fractional ranking ("1 2.5 2.5 4" ranking)
|
298 |
|
299 |
Items that compare equal receive the same ranking number, which is the mean
|
300 |
of what they would have under ordinal rankings. Equivalently, the ranking
|
301 |
number of 1 plus the number of items ranked above it plus half the number
|
302 |
of items equal to it. This strategy has the property that the sum of the
|
303 |
ranking numbers is the same as under ordinal ranking. For this reason, it
|
304 |
is used in computing Borda counts and in statistical tests (see below).
|
305 |
|
306 |
Thus if A ranks ahead of B and C (which compare equal) which are both ranked
|
307 |
ahead of D, then A gets ranking number 1 ("first"), B and C each get ranking
|
308 |
number 2.5 (average of "joint second/third") and D gets ranking number 4
|
309 |
("fourth").
|
310 |
|
311 |
Here is an example: Suppose you have the data set 1.0, 1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 5.0, 5.0.
|
312 |
The ordinal ranks are 1, 2, 3, 4, 5, 6, 7, 8, 9.
|
313 |
For v = 1.0, the fractional rank is the average of the ordinal ranks: (1 + 2) / 2 = 1.5.
|
314 |
In a similar manner, for v = 5.0, the fractional rank is (7 + 8 + 9) / 3 = 8.0.
|
315 |
Thus the fractional ranks are: 1.5, 1.5, 3.0, 4.5, 4.5, 6.0, 8.0, 8.0, 8.0
|
316 |
</summary>
|
317 |
<typeparam name="C"></typeparam>
|
318 |
<param name="list"></param>
|
319 |
<returns></returns>
|
320 |
<Extension> Public Function FractionalRanking(Of C As IComparable)(list As IEnumerable(Of C), Optional desc As Boolean = False) As Double()
|
321 |
Dim vector As C() = list.ToArray
|
322 |
Dim array As SeqValue(Of C)() = vector.SeqIterator.ToArray
|
323 |
Dim ranks#() = vector.OrdinalRanking(desc)
|
324 |
Dim equals = array.GroupBy(Function(x) x.value)
|
325 |
|
326 |
For Each g As IGrouping(Of C, SeqValue(Of C)) In equals
|
327 |
Dim avgRanks# = Aggregate i In g Into Average(ranks(i))
|
328 |
|
329 |
For Each i As SeqValue(Of C) In g
|
330 |
ranks(i.i) = avgRanks
|
331 |
Next
|
332 |
Next
|
333 |
|
334 |
Return ranks
|
335 |
End Function
|
336 |
End Module
|
337 |
End Namespace
|