zoukankan      html  css  js  c++  java
  • python转载[Rsync Algorithm]

    转自:http://code.activestate.com/recipes/577518-rsync-algorithm/?in=lang-python

    代码:Tested in Python 2.5, 2.6, and 3.1. In 2.7, io.BufferedReader should yield the best throughput. On all other versions use __builtin__.open.

    #!/usr/bin/env python
    #
     -*- coding: utf-8 -*-
    """
    This is a pure Python implementation of the [rsync algorithm](TM96).

    [TM96] Andrew Tridgell and Paul Mackerras. The rsync algorithm.
    Technical Report TR-CS-96-05, Canberra 0200 ACT, Australia, 1996.
    http://samba.anu.edu.au/rsync/.

    ### Example Use Case: ###

        # On the system containing the file that needs to be patched
        >>> unpatched = open("unpatched.file", "rb")
        >>> hashes = blockchecksums(unpatched)

        # On the remote system after having received `hashes`
        >>> patchedfile = open("patched.file", "rb")
        >>> delta = rsyncdelta(patchedfile, hashes)

        # System with the unpatched file after receiving `delta`
        >>> unpatched.seek(0)
        >>> save_to = open("locally-patched.file", "wb")
        >>> patchstream(unpatched, save_to, delta)
    """

    import collections
    import hashlib

    if not(hasattr(__builtins__"bytes")) or str is bytes:
        
    # Python 2.x compatibility
        def bytes(var, *args):
            
    try:
                
    return ''.join(map(chr, var))
            
    except TypeError:
                
    return map(ord, var)

    __all__ = ["rollingchecksum""weakchecksum""patchstream""rsyncdelta",
        
    "blockchecksums"]


    def rsyncdelta(datastream, remotesignatures, blocksize=4096):
        
    """
        Generates a binary patch when supplied with the weak and strong
        hashes from an unpatched target and a readable stream for the
        up-to-date data. The blocksize must be the same as the value
        used to generate remotesignatures.
        
    """
        remote_weak, remote_strong 
    = remotesignatures

        match 
    = True
        matchblock 
    = -1
        deltaqueue 
    = collections.deque()

        
    while True:
            
    if match and datastream is not None:
                
    # Whenever there is a match or the loop is running for the first
                # time, populate the window using weakchecksum instead of rolling
                # through every single byte which takes at least twice as long.
                window = collections.deque(bytes(datastream.read(blocksize)))
                checksum, a, b 
    = weakchecksum(window)

            
    try:
                
    # If there are two identical weak checksums in a file, and the
                # matching strong hash does not occur at the first match, it will
                # be missed and the data sent over. May fix eventually, but this
                # problem arises very rarely.
                matchblock = remote_weak.index(checksum, matchblock + 1)
                stronghash 
    = hashlib.md5(bytes(window)).hexdigest()
                matchblock 
    = remote_strong.index(stronghash, matchblock)

                match 
    = True
                deltaqueue.append(matchblock)

                
    if datastream.closed:
                    
    break
                
    continue

            
    except ValueError:
                
    # The weakchecksum did not match
                match = False
                
    try:
                    
    if datastream:
                        
    # Get the next byte and affix to the window
                        newbyte = ord(datastream.read(1))
                        window.append(newbyte)
                
    except TypeError:
                    
    # No more data from the file; the window will slowly shrink.
                    # newbyte needs to be zero from here on to keep the checksum
                    # correct.
                    newbyte = 0
                    tailsize 
    = datastream.tell() % blocksize
                    datastream 
    = None

                
    if datastream is None and len(window) <= tailsize:
                    
    # The likelihood that any blocks will match after this is
                    # nearly nil so call it quits.
                    deltaqueue.append(window)
                    
    break

                
    # Yank off the extra byte and calculate the new window checksum
                oldbyte = window.popleft()
                checksum, a, b 
    = rollingchecksum(oldbyte, newbyte, a, b, blocksize)

                
    # Add the old byte the file delta. This is data that was not found
                # inside of a matching block so it needs to be sent to the target.
                try:
                    deltaqueue[
    -1].append(oldbyte)
                
    except (AttributeError, IndexError):
                    deltaqueue.append([oldbyte])

        
    # Return a delta that starts with the blocksize and converts all iterables
        # to bytes.
        deltastructure = [blocksize]
        
    for element in deltaqueue:
            
    if isinstance(element, int):
                deltastructure.append(element)
            
    elif element:
                deltastructure.append(bytes(element))

        
    return deltastructure


    def blockchecksums(instream, blocksize=4096):
        
    """
        Returns a list of weak and strong hashes for each block of the
        defined size for the given data stream.
        
    """
        weakhashes 
    = list()
        stronghashes 
    = list()
        read 
    = instream.read(blocksize)

        
    while read:
            weakhashes.append(weakchecksum(bytes(read))[0])
            stronghashes.append(hashlib.md5(read).hexdigest())
            read 
    = instream.read(blocksize)

        
    return weakhashes, stronghashes


    def patchstream(instream, outstream, delta):
        
    """
        Patches instream using the supplied delta and write the resultantant
        data to outstream.
        
    """
        blocksize 
    = delta[0]

        
    for element in delta[1:]:
            
    if isinstance(element, int) and blocksize:
                instream.seek(element 
    * blocksize)
                element 
    = instream.read(blocksize)
            outstream.write(element)


    def rollingchecksum(removed, new, a, b, blocksize=4096):
        
    """
        Generates a new weak checksum when supplied with the internal state
        of the checksum calculation for the previous window, the removed
        byte, and the added byte.
        
    """
        a 
    -= removed - new
        b 
    -= removed * blocksize - a
        
    return (b << 16| a, a, b


    def weakchecksum(data):
        
    """
        Generates a weak checksum from an iterable set of bytes.
        
    """
        a 
    = b = 0
        l 
    = len(data)
        
    for i in range(l):
            a 
    += data[i]
            b 
    += (l - i)*data[i]

        
    return (b << 16| a, a, b

    测试:

    # On the system containing the file that needs to be patched 
    >>> unpatched = open("unpatched.file", "rb") 
    >>> hashes = blockchecksums(unpatched) 
     
    # On the remote system after having received `hashes` 
    >>> patchedfile = open("patched.file", "rb") 
    >>> delta = rsyncdelta(patchedfile, hashes) 
     
    # System with the unpatched file after receiving `delta` 
    >>> unpatched.seek(0) 
    >>> save_to = open("locally-patched.file", "wb") 
    >>> patchstream(unpatched, save_to, delta) 

     

    rsync算法:http://www.cnblogs.com/itech/archive/2010/06/13/1757952.html

     

    完!


  • 相关阅读:
    python 学习——sqlalchemy 模块
    python学习——数据库基本知识mysql
    算法设计22——并行算法2 实际应用中的优化
    Perl 学习
    python学习——装饰器、生成器、迭代器
    算法设计19—— 全对最短路径 Floyd算法
    asp.net Core 使用过滤器判断请求客户端是否为移动端,并实现PC端和移动端请求映射和自动跳转
    在windows平台使用Apache James搭建邮件服务器以及使用C#向外网发送邮件
    asp.net core3.1策略授权问题
    Unity调用安卓中的方法遇到的问题
  • 原文地址:https://www.cnblogs.com/itech/p/1940463.html
Copyright © 2011-2022 走看看