zoukankan      html  css  js  c++  java
  • 最长公共字串算法, 文本比较算法, longest common subsequence(LCS) algorithm

     1 '''
     2 merge two configure files, basic file is aFile
     3 insert the added content of bFile compare to aFile
     4 for example, 'bbb' is added content
     5 -----------------------------------------------------------
     6 a file content  |  b file content  |  c merged file content
     7     111         |       111        |    111
     8     aaa         |       bbb        |    aaa
     9                 |                  |    bbb
    10     222         |       222        |    222
    11 ------------------------------------------------------------
    12 '''
    13 def mergeFiles(aPath, bPath, cPath):
    14 
    15     with open(aPath, 'r') as f:
    16         aLines = f.readlines();
    17         aLines = [ line.strip() + '
    ' for line in aLines]
    18 
    19     with open(bPath, 'r') as f:
    20         bLines = f.readlines();
    21         bLines = [ line.strip() + '
    ' for line in bLines]
    22 
    23     cLines = mergeSequences(aLines, bLines)
    24 
    25     with open(cPath, 'w') as f:
    26         for line in cLines:
    27             f.write(line)
    28 
    29 '''
    30 merge the sequence
    31 '''
    32 def mergeSequences(aLines, bLines):
    33     record = {}
    34     lcs = findLCS(record, aLines, 0, bLines, 0)
    35     currA = currB = 0
    36     merged = []
    37     for (line, aI, bI) in lcs:
    38 
    39         # add deleted
    40         if aI > currA:
    41             merged.extend(aLines[currA:aI])
    42         currA = aI + 1
    43 
    44         # add added
    45         if bI > currB:
    46             merged.extend(bLines[currB:bI])
    47         currB = bI + 1
    48 
    49         # add common
    50         merged.append(line)
    51 
    52     if currA < len(aLines):
    53         merged.extend(aLines[currA:])
    54     if currB < len(bLines):
    55         merged.extend(bLines[currB:])
    56 
    57     return merged
    58 
    59 '''
    60 find Longest common subsequence
    61 return list of (line, x, y)
    62 line is common line, x is the index in aLines, y is the index in bLines
    63 TODO: eliminate recursive invoke, use dynamic algorithm
    64 '''
    65 def findLCS(record, aLines, aStart, bLines, bStart):
    66 
    67     key = lcsKey(aStart, bStart)
    68     if record.has_key(key):
    69         return record[key]
    70 
    71     aL = aLines[aStart:]
    72     bL = bLines[bStart:]
    73     if len(aL) > 0 and len(bL) > 0:
    74         if aL[0] == bL[0]:
    75             lsc = [(aL[0], aStart, bStart)]
    76             lsc.extend(findLCS(record, aLines, aStart + 1, bLines, bStart + 1))
    77             record[key] = lsc
    78             return lsc
    79         else:
    80             aLsc = findLCS(record, aLines, aStart, bLines, bStart + 1)
    81             bLsc = findLCS(record, aLines, aStart + 1, bLines, bStart)
    82 
    83             if len(aLsc) > len(bLsc):
    84                 record[key] = aLsc
    85                 return aLsc
    86             else:
    87                 record[key] = bLsc
    88                 return bLsc
    89     else:
    90         return []
    91 
    92 Code
  • 相关阅读:
    Ubuntu16.04下Django项目的部署
    Ubuntu16.04 下python2 | python3
    请求头请求体对应表
    Django项目开发-小技巧
    前端验证后端验证码问题
    Ugly Number
    移动0元素
    图片(画布上的图片)上传总结
    从矩阵中查找一个数
    搜索框(附带事件函数)
  • 原文地址:https://www.cnblogs.com/lpthread/p/3425363.html
Copyright © 2011-2022 走看看