| 1 | #Region "Microsoft.VisualBasic::e2b7be939df9c777bafc51825a4cc984, Microsoft.VisualBasic.Core\ComponentModel\Algorithm\SCS.vb" |
| 2 | |
| 3 | ' Author: |
| 4 | ' |
| 5 | ' asuka (amethyst.asuka@gcmodeller.org) |
| 6 | ' xie (genetics@smrucc.org) |
| 7 | ' xieguigang (xie.guigang@live.com) |
| 8 | ' |
| 9 | ' Copyright (c) 2018 GPL3 Licensed |
| 10 | ' |
| 11 | ' |
| 12 | ' GNU GENERAL PUBLIC LICENSE (GPL3) |
| 13 | ' |
| 14 | ' |
| 15 | ' This program is free software: you can redistribute it and/or modify |
| 16 | ' it under the terms of the GNU General Public License as published by |
| 17 | ' the Free Software Foundation, either version 3 of the License, or |
| 18 | ' (at your option) any later version. |
| 19 | ' |
| 20 | ' This program is distributed in the hope that it will be useful, |
| 21 | ' but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 22 | ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 23 | ' GNU General Public License for more details. |
| 24 | ' |
| 25 | ' You should have received a copy of the GNU General Public License |
| 26 | ' along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 27 | |
| 28 | |
| 29 | |
| 30 | ' /********************************************************************************/ |
| 31 | |
| 32 | ' Summaries: |
| 33 | |
| 34 | ' Module SCS |
| 35 | ' |
| 36 | ' Function: Coverage, MaxPrefixLength, ShortestCommonSuperString |
| 37 | ' |
| 38 | ' Sub: TableView |
| 39 | ' |
| 40 | ' |
| 41 | ' /********************************************************************************/ |
| 42 | |
| 43 | #End Region |
| 44 | |
| 45 | Imports System.IO |
| 46 | Imports System.Runtime.CompilerServices |
| 47 | Imports Microsoft.VisualBasic.Language |
| 48 | |
| 49 | Namespace ComponentModel.Algorithm |
| 50 | |
| 51 | ''' <summary> |
| 52 | ''' shortest_common_superstring |
| 53 | ''' </summary> |
| 54 | ''' <remarks> |
| 55 | ''' https://github.com/aakash01/codebase/blob/60394bf92eb09410c07eec1c4d3c81cf0fc72a70/src/com/aakash/practice/interviewbit_may2017/dynamic_programming/ShortestCommonSuperString.java |
| 56 | ''' </remarks> |
| 57 | Public Module SCS |
| 58 | |
| 59 | <Extension> |
| 60 | Public Sub TableView(fragments As IEnumerable(Of String), SCS$, ByRef print As TextWriter, Optional empty As Char = "."c) |
| 61 | Dim lines As New List(Of String) |
| 62 | |
| 63 | Call print.WriteLine(SCS) |
| 64 | |
| 65 | For Each str As String In fragments |
| 66 | Dim start% = InStr(SCS, str) - 1 |
| 67 | Dim ends% = start + str.Length |
| 68 | Dim lefts = SCS.Length - ends |
| 69 | Dim view$ = New String(empty, start) & str & New String(empty, lefts) |
| 70 | |
| 71 | lines += view |
| 72 | Next |
| 73 | |
| 74 | Call print.WriteLine($"#Coverage={Coverage(lines, blank:=empty)}") |
| 75 | Call print.WriteLine(lines.JoinBy(print.NewLine)) |
| 76 | Call print.Flush() |
| 77 | End Sub |
| 78 | |
| 79 | ''' <summary> |
| 80 | ''' 使用重叠程度最高的片段作为统计的标准 |
| 81 | ''' </summary> |
| 82 | ''' <param name="table"></param> |
| 83 | ''' <returns></returns> |
| 84 | Public Function Coverage(table As IEnumerable(Of String), Optional blank As Char = "."c) As Integer |
| 85 | ' 重叠程度最高,意味着blank是最少的 |
| 86 | Dim lines As Char()() = table.Select(Function(s) s.ToArray).ToArray |
| 87 | ' 因为都是等长的,所以直接使用第一条作为标准了 |
| 88 | Dim length% = lines(Scan0).Length |
| 89 | Dim coverages As New List(Of Integer) |
| 90 | Dim index% |
| 91 | |
| 92 | For i As Integer = 0 To length - 1 |
| 93 | index = i |
| 94 | coverages += lines _ |
| 95 | .Where(Function(seq) |
| 96 | Return seq(index) <> blank |
| 97 | End Function) _ |
| 98 | .Count |
| 99 | Next |
| 100 | |
| 101 | Return coverages.Max |
| 102 | End Function |
| 103 | |
| 104 | ''' <summary> |
| 105 | ''' Solve using Greedy. Forf all string find the max common prefix/suffix. Merge those two strings |
| 106 | ''' and continue it. |
| 107 | ''' </summary> |
| 108 | ''' <remarks> |
| 109 | ''' 当这个函数遇到完全没有重叠的序列片段的时候,是会直接将这个不重叠的片段接到SCS的最末尾的 |
| 110 | ''' </remarks> |
| 111 | <Extension> |
| 112 | Public Function ShortestCommonSuperString(Seqs As List(Of String)) As String |
| 113 | Dim l As Integer = Seqs.Count |
| 114 | |
| 115 | Do While l > 1 |
| 116 | Dim currMax As Integer = Integer.MinValue |
| 117 | Dim finalStr As String = Nothing |
| 118 | Dim p As Integer = -1, q As Integer = -1 |
| 119 | |
| 120 | For j As Integer = 0 To l - 1 |
| 121 | For k As Integer = j + 1 To l - 1 |
| 122 | Dim str As String = Seqs(j) |
| 123 | Dim b As String = Seqs(k) |
| 124 | |
| 125 | If str.Contains(b) Then |
| 126 | If b.Length > currMax Then |
| 127 | finalStr = str |
| 128 | currMax = b.Length |
| 129 | p = j |
| 130 | q = k |
| 131 | End If |
| 132 | ElseIf b.Contains(str) Then |
| 133 | If str.Length > currMax Then |
| 134 | finalStr = b |
| 135 | currMax = str.Length |
| 136 | p = j |
| 137 | q = k |
| 138 | End If |
| 139 | Else |
| 140 | ' find max common prefix and suffix |
| 141 | Dim maxPrefixMatch = MaxPrefixLength(str, b) |
| 142 | If maxPrefixMatch > currMax Then |
| 143 | finalStr = str + b.Substring(maxPrefixMatch) |
| 144 | currMax = maxPrefixMatch |
| 145 | p = j |
| 146 | q = k |
| 147 | End If |
| 148 | |
| 149 | Dim maxSuffixMatch = MaxPrefixLength(b, str) |
| 150 | If maxSuffixMatch > currMax Then |
| 151 | finalStr = b + str.Substring(maxSuffixMatch) |
| 152 | currMax = maxSuffixMatch |
| 153 | p = j |
| 154 | q = k |
| 155 | End If |
| 156 | End If |
| 157 | Next |
| 158 | Next |
| 159 | |
| 160 | l -= 1 |
| 161 | Seqs(p) = finalStr |
| 162 | Seqs(q) = Seqs(l) |
| 163 | Loop |
| 164 | |
| 165 | Return Seqs.First |
| 166 | End Function |
| 167 | |
| 168 | Private Function MaxPrefixLength(a As String, b As String) As Integer |
| 169 | Dim max As Integer = 0 |
| 170 | Dim prefix$ |
| 171 | |
| 172 | For i As Integer = 0 To b.Length - 1 |
| 173 | prefix = b.Substring(0, i + 1) |
| 174 | |
| 175 | If a.EndsWith(prefix) Then |
| 176 | max = i + 1 |
| 177 | End If |
| 178 | Next |
| 179 | |
| 180 | Return max |
| 181 | End Function |
| 182 | End Module |
| 183 | End Namespace |