1 | #Region "Microsoft.VisualBasic::e2b7be939df9c777bafc51825a4cc984, Microsoft.VisualBasic.Core\ComponentModel\Algorithm\SCS.vb" |
2 | |
3 | ' Author: |
4 | ' |
5 | ' asuka (amethyst.asuka@gcmodeller.org) |
6 | ' xie (genetics@smrucc.org) |
7 | ' xieguigang (xie.guigang@live.com) |
8 | ' |
9 | ' Copyright (c) 2018 GPL3 Licensed |
10 | ' |
11 | ' |
12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
13 | ' |
14 | ' |
15 | ' This program is free software: you can redistribute it and/or modify |
16 | ' it under the terms of the GNU General Public License as published by |
17 | ' the Free Software Foundation, either version 3 of the License, or |
18 | ' (at your option) any later version. |
19 | ' |
20 | ' This program is distributed in the hope that it will be useful, |
21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23 | ' GNU General Public License for more details. |
24 | ' |
25 | ' You should have received a copy of the GNU General Public License |
26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
27 | |
28 | |
29 | |
30 | ' /********************************************************************************/ |
31 | |
32 | ' Summaries: |
33 | |
34 | ' Module SCS |
35 | ' |
36 | ' Function: Coverage, MaxPrefixLength, ShortestCommonSuperString |
37 | ' |
38 | ' Sub: TableView |
39 | ' |
40 | ' |
41 | ' /********************************************************************************/ |
42 | |
43 | #End Region |
44 | |
45 | Imports System.IO |
46 | Imports System.Runtime.CompilerServices |
47 | Imports Microsoft.VisualBasic.Language |
48 | |
49 | Namespace ComponentModel.Algorithm |
50 | |
51 | ''' <summary> |
52 | ''' shortest_common_superstring |
53 | ''' </summary> |
54 | ''' <remarks> |
55 | ''' https://github.com/aakash01/codebase/blob/60394bf92eb09410c07eec1c4d3c81cf0fc72a70/src/com/aakash/practice/interviewbit_may2017/dynamic_programming/ShortestCommonSuperString.java |
56 | ''' </remarks> |
57 | Public Module SCS |
58 | |
59 | <Extension> |
60 | Public Sub TableView(fragments As IEnumerable(Of String), SCS$, ByRef print As TextWriter, Optional empty As Char = "."c) |
61 | Dim lines As New List(Of String) |
62 | |
63 | Call print.WriteLine(SCS) |
64 | |
65 | For Each str As String In fragments |
66 | Dim start% = InStr(SCS, str) - 1 |
67 | Dim ends% = start + str.Length |
68 | Dim lefts = SCS.Length - ends |
69 | Dim view$ = New String(empty, start) & str & New String(empty, lefts) |
70 | |
71 | lines += view |
72 | Next |
73 | |
74 | Call print.WriteLine($"#Coverage={Coverage(lines, blank:=empty)}") |
75 | Call print.WriteLine(lines.JoinBy(print.NewLine)) |
76 | Call print.Flush() |
77 | End Sub |
78 | |
79 | ''' <summary> |
80 | ''' 使用重叠程度最高的片段作为统计的标准 |
81 | ''' </summary> |
82 | ''' <param name="table"></param> |
83 | ''' <returns></returns> |
84 | Public Function Coverage(table As IEnumerable(Of String), Optional blank As Char = "."c) As Integer |
85 | ' 重叠程度最高,意味着blank是最少的 |
86 | Dim lines As Char()() = table.Select(Function(s) s.ToArray).ToArray |
87 | ' 因为都是等长的,所以直接使用第一条作为标准了 |
88 | Dim length% = lines(Scan0).Length |
89 | Dim coverages As New List(Of Integer) |
90 | Dim index% |
91 | |
92 | For i As Integer = 0 To length - 1 |
93 | index = i |
94 | coverages += lines _ |
95 | .Where(Function(seq) |
96 | Return seq(index) <> blank |
97 | End Function) _ |
98 | .Count |
99 | Next |
100 | |
101 | Return coverages.Max |
102 | End Function |
103 | |
104 | ''' <summary> |
105 | ''' Solve using Greedy. Forf all string find the max common prefix/suffix. Merge those two strings |
106 | ''' and continue it. |
107 | ''' </summary> |
108 | ''' <remarks> |
109 | ''' 当这个函数遇到完全没有重叠的序列片段的时候,是会直接将这个不重叠的片段接到SCS的最末尾的 |
110 | ''' </remarks> |
111 | <Extension> |
112 | Public Function ShortestCommonSuperString(Seqs As List(Of String)) As String |
113 | Dim l As Integer = Seqs.Count |
114 | |
115 | Do While l > 1 |
116 | Dim currMax As Integer = Integer.MinValue |
117 | Dim finalStr As String = Nothing |
118 | Dim p As Integer = -1, q As Integer = -1 |
119 | |
120 | For j As Integer = 0 To l - 1 |
121 | For k As Integer = j + 1 To l - 1 |
122 | Dim str As String = Seqs(j) |
123 | Dim b As String = Seqs(k) |
124 | |
125 | If str.Contains(b) Then |
126 | If b.Length > currMax Then |
127 | finalStr = str |
128 | currMax = b.Length |
129 | p = j |
130 | q = k |
131 | End If |
132 | ElseIf b.Contains(str) Then |
133 | If str.Length > currMax Then |
134 | finalStr = b |
135 | currMax = str.Length |
136 | p = j |
137 | q = k |
138 | End If |
139 | Else |
140 | ' find max common prefix and suffix |
141 | Dim maxPrefixMatch = MaxPrefixLength(str, b) |
142 | If maxPrefixMatch > currMax Then |
143 | finalStr = str + b.Substring(maxPrefixMatch) |
144 | currMax = maxPrefixMatch |
145 | p = j |
146 | q = k |
147 | End If |
148 | |
149 | Dim maxSuffixMatch = MaxPrefixLength(b, str) |
150 | If maxSuffixMatch > currMax Then |
151 | finalStr = b + str.Substring(maxSuffixMatch) |
152 | currMax = maxSuffixMatch |
153 | p = j |
154 | q = k |
155 | End If |
156 | End If |
157 | Next |
158 | Next |
159 | |
160 | l -= 1 |
161 | Seqs(p) = finalStr |
162 | Seqs(q) = Seqs(l) |
163 | Loop |
164 | |
165 | Return Seqs.First |
166 | End Function |
167 | |
168 | Private Function MaxPrefixLength(a As String, b As String) As Integer |
169 | Dim max As Integer = 0 |
170 | Dim prefix$ |
171 | |
172 | For i As Integer = 0 To b.Length - 1 |
173 | prefix = b.Substring(0, i + 1) |
174 | |
175 | If a.EndsWith(prefix) Then |
176 | max = i + 1 |
177 | End If |
178 | Next |
179 | |
180 | Return max |
181 | End Function |
182 | End Module |
183 | End Namespace |