zoukankan      html  css  js  c++  java
  • ftp同步代码

    一个很naive的代码,用来做ftp的“主->从 下载,从->主 上传”。ftp可不像mysql那样还有log可以用,所以完全naive的做法:连到ftp server然后递归列出所有目录,和维护的文件列表对比,有新增文件就下载到从服务器(其实是本地PC,囧);然后从服务器也列出目录下文件列表,和维护的文件列表比较,有新增就上传。维护一个文件列表,作用是:当主ftp server删除文件的时候,从服务器不会上传删掉的文件。

    使用python自带的ftplib,感觉功能应该挺弱的,不过也没怎么发现更强大的ftp的python库。中文处理还是比较拙计,windows上的目录和文件名字都要用name.decode('utf-8').encode('gbk')这样丑陋的方式处理才能正确使用,一个小问题是,如果从服务器新增文件名含有中文,上传到主服务器后文件名乱码,用的时候注意。

    # coding: utf-8
    # author: ChrisZZ, zchrissirhcz@gmail.com
    # description: 做ftp同步
    # 有本地主机A,和ftp服务器C,需要从A上传图片到C服务器
    # 同步策略:增加中间变量B,维护一个文件列表。C上出现过的文件都存,C上删除的则不删除;A上新增的也存
    # 形式化表示为:
    # B = {}
    # while (true) {
    #   B += C - B
    #   A += B - A
    #   B += A - B
    # }
    
    # 需要做的几个模块
    # 1. 定义文件列表数据结构,用于A,B,C的表示和加法、减法
    # 2. ftp连接
    # 3. ftp获取服务器上文件目录结构
    # 4. ftp复制文件
    # 5. ftp缓冲区大小,重传机制
    
    # 编码处理
    # qq='喜欢你'
    # charset_detect = chardet.detect(qq)
    # cur_charset = charset_detect['encoding']
    # print charset_detect
    # #qq.decode(cur_charset)
    # qq=qq.decode('utf-8')
    # qq=qq.encode('gbk')
    # print qq
    
    # step1 ftp连接
    from ftplib import FTP
    import sys
    import os
    import string
    import time
    # import chardet
    #reload(sys)
    
    #sys_encode = sys.getfilesystemencoding()
    #print '系统默认的编码为', sys_encode
    # print mystr.decode('utf-8').encode(type)
    
    #reload(sys)
    #sys.setdefaultencoding('utf8')
    
    class Myfile(object):
        def __init__(self, name, size, mtime):
            self.name = name  # 文件名字
    
            self.mtime = mtime  # 文件创建时间
            self.is_dir = False   # 是否为文件夹,默认为不是文件夹
    
            #self.size = float(size) / (1024 * 1024)  # 文件大小
            size = float(size)
            if size > 1024*1024:
                self.size = str('%.2f'%(size / (1024*1024))) + 'MB'
            elif size > 1024:
                self.size = str('%.2f'%(size / 1024)) + 'KB'
            else:
                self.size = str(size) + 'Bytes'
        @property
        def is_file(self):
            return not self.is_dir
    
        @property
        def dir_property(self):
            if self.is_dir==True:
                return 'dir'
            return 'file'
    
        def show(self):
            print '[%s], [%s], [%s], [%s]' % (self.name, self.size, self.mtime, self.dir_property)
    
        @property
        def pack(self):
            """
            将myfile对象封装为一个字符串
            :return:
            """
            #return '[%s][%s][%s]'%(self.name, self.size, self.mtime)
            return '[%s][%s]'%(self.name, self.size)
    
    def ftpconnect():
        ftp_server = 'host_ip'
        username = 'name'
        password = 'password'
        ftp=FTP(ftp_server)
        ftp.login(username, password)
    
        #ftp.set_debuglevel(2) #打开调试级别2,显示详细信息
        #ftp.connect(ftp_server,21) #连接
        #ftp.login(username,password) #登录,如果匿名登录则用空串代替即可
        return ftp
    
    def str_codec_std(mystr):
        """
        将字符串转换编码,使得本地打印输出是正常的!
        具体讲,就是从ascii编码转化为utf-8编码
        :param mystr:需要转化编码的字符串
        :return:转化好编码的字符串
        """
        return mystr.decode('gbk').encode('utf8')
    
    def filter_dir_list(mystr_list):
        """
        过滤目录列表,包括转换编码,以及去除'.'和'..'目录,然后创建Myfile对象
        :param mystr_list:
        :return:
        """
        res = []
        for mystr in mystr_list:
            mystr = str_codec_std(mystr)
            # print "mystr is :%s" % mystr
            file_info = string.split(mystr, maxsplit=8)   # 一共有9列,第9列是文件名。文件名可能有空格,所以要指定maxsplit. 从0开始计数,name是第8列
    
            # 形如['-rw-r--r--', '1', '48', 'apache', '1018596', 'Dec', '1', '20:08', 'RPN_BF.pdf']
            name = file_info[8]
            # print 'name = ', name
            if name == '.' or name == '..':
                continue
    
            size = file_info[4]
            mtime = '%s-%s-%s' % (file_info[5], file_info[6], file_info[7])
    
            myfile = Myfile(name=name, size=size, mtime=mtime)
    
            dir_info = file_info[0]
            if dir_info[0] == 'd':
                myfile.is_dir = True
            res.append(myfile)
        return res
    
    def list_local_dir(local_dir):
        """
        列出本地目录下的内容,并将local_dir换成root路径'/',并转为unix风格路径
        :param local_dir:本地路径,比如'g:/code/ftp_sync'
        :return:文件列表,是local_dir路径下递归找出的文件
        """
        #local_dir = 'g:/code/ftp_sync'
        g=os.walk(local_dir)
        res = []
        for path,d,filelist in g:
            path = path.replace("\", "/")
            if path[-1]!='/':
                path = path + '/'
            prefix = path[len(local_dir):]
            for filename in filelist:
                filename = filename
                name = os.path.join('/', prefix, filename)
                size = os.path.getsize(os.path.join(path, filename))
                size = float(size)
                if size > 1024 * 1024:
                    size = str('%.2f' % (size / (1024 * 1024))) + 'MB'
                elif size > 1024:
                    size = str('%.2f' % (size / 1024)) + 'KB'
                else:
                    size = str(size) + 'Bytes'
    
                mtime = os.path.getmtime(os.path.join(path, filename))
                mtime = time.strftime('%b-%d-%H:%M', time.localtime(mtime))
    
                #item = '/'+prefix+'/'+filename
                record = '[%s][%s]' % (name, size)
                record = record.decode('gbk').encode('utf-8')
                res.append(record)
        return res
    
    def list_server_dir(server_dir=None):
        """
        获取ftp服务器上,指定目录下递归列出的文件列表
        :param server_dir: ftp服务器上的目录,比如'/photo'
        :return:文件列表,是server_dir路径下递归找出的文件
        """
        #sys.setdefaultencoding('utf8')
        #sys_encode = sys.getfilesystemencoding()
        # 先连接服务器
        ftp = ftpconnect()
    
        # 列出指定目录下所有文件
        server_file_list = []
        fuck_callback = lambda x: (server_file_list.append(x))
    
        ftp.retrlines('LIST', fuck_callback)
    
        # 生成C
        server_file_items = filter_dir_list(server_file_list)
        #for server_file in server_file_items:
        #    server_file.show()
    
        #for server_file in server_file_list:
        #    #print server_file.decode('gbk').encode('utf-8')
        #    print string.split(server_file,maxsplit=8)
    
        # 关闭连接
        ftp.quit()
    
    def get_C(ftp, target_dir=None):
        C = []
        if target_dir is not None:
            ftp.cwd(target_dir)   # change working directory to target_dir
        server_file_list = []
        fuck_callback = lambda x: (server_file_list.append(x))
        ftp.retrlines('LIST', fuck_callback)
        server_file_items = filter_dir_list(server_file_list)
        for item in server_file_items:
            if item.is_dir:
                sub_C = get_C(ftp, item.name)
                # sub_C = ['/'+item.name+'/'+cc.name for cc in sub_C]
                for cc in sub_C:
                    cc.name = '/'+item.name+cc.name
                C.extend(sub_C)
            else:
                item.name = '/' + item.name
                C.append(item)
        return C
    
    def old_main():
    #if __name__ == '__main__':
        #dir_content = list_local_dir('g:/code/ftp_sync')
        #for dir_line in dir_content:
        #    print dir_line
    
        # list_server_dir()
    
        # 生成A
        local_sync_dir = 'g:/code/ftp_sync/sycn'
        A = list_local_dir(local_sync_dir)
    
        # 生成C  TODO:连接失败的处理
        ftp = ftpconnect()
        server_file_list = []
        fuck_callback = lambda x: (server_file_list.append(x))
        ftp.retrlines('LIST', fuck_callback)
        server_file_items = filter_dir_list(server_file_list)
        # C = [item.pack for item in server_file_items if item.is_file]
        C = get_C(ftp)
        C = [cc.name for cc in C]
        for cc in C:
            print cc
    
    def list_diff(a, b):
        """
        差集操作A-B
        :param a:
        :param b:
        :return:
        """
        ret = []
        for i in a:
            if i not in b:
                ret.append(i)
        return ret
    
    #if __name__ == '__main__':
    def future_main():
        # 设定B
        B = []
    
        # 设定C
        ftp = ftpconnect()
        C = get_C(ftp)
        #ftp.quit()
        C = [cc.pack for cc in C]
    
        # 设定A
        local_sync_dir = 'g:/code/ftp_sync/sync'
        A = list_local_dir(local_sync_dir)
        print '====== Begin本地文件列表(A)'
        for aa in A:
            print aa
        print '====== End本地文件列表(A)'
    
        # B += C - B
        delta = list_diff(C, B)
        B.extend(delta)
        print '====== Begin维护文件列表(B)'
        for bb in B:
            print bb
        print '====== End维护文件列表(B)'
        #print '====before===='
        #for aa in A:
        #    print aa
    
        #print len(A)
    
        # A += B - A
        delta = list_diff(B, A)
        print '====== Begin服务器上新增文件列表(B-A):'
        for d in delta:
            print d
        # TODO: 把delta里的文件都下载下来
        bufsize = 20000 * 1024
        for filename in delta:
            filename = filename.split(']')[0][1:]
            filename = filename.decode('utf-8').encode('gbk')
            #print 'filename is : ', filename
    
            LocalFile = local_sync_dir + filename
            print '将服务器上的文件%s保存为本地文件%s' % (filename.decode('gbk').encode('utf-8'), LocalFile.decode('gbk').encode('utf-8'))
    
            server_dir = filename[0:filename.rindex('/')+1]
            #print 'server_dir:' , server_dir
            ftp.cwd(server_dir)
    
            short_filename = filename[filename.rindex('/')+1:]
            #print 'short_filename is ', short_filename
            #LocalFile = string.replace(LocalFile, '/', '\')
            #print 'item is:', LocalFile
    
            #print 'local_sync_dir is ', local_sync_dir
            #local_sync_dir = string.replace(local_sync_dir, '/', '\')
            local_dir = LocalFile[0:LocalFile.rindex('/')+1]
            print 'local_dir is ', local_dir
            if(not os.path.exists(local_dir)):
                print '创建目录%s' % local_dir
                os.makedirs(local_dir)
    
            #LocalFile = LocalFile.decode('utf-8').encode('gbk')
            fh = open(LocalFile, 'wb')
            ftp.retrbinary('RETR '+short_filename, fh.write, bufsize)
            #ftp.storbinary('STOR %s' % os.path.basename(short_filename), fh.write, bufsize)
            #ftp.storbinary('STOR %s'%short_filename, fh, bufsize)
            fh.close()
        A.extend(delta)
    
        print "==== ^_^Begin下载完毕,现在本地文件列表如下"
        for aa in A:
            print aa
        print "==== ^_^End下载完毕,现在本地文件列表如下"
    
        # B += A - B
        delta = list_diff(A, B)
        # TODO: 把delta里面的文件都上传上去
        for filename in delta:
            filename = filename.split(']')[0][1:]
            #filename = filename.decode('utf-8').encode('gbk')
    
            LocalFile = local_sync_dir + filename
            #LocalFile = LocalFile.decode('utf-8').encode('gbk')
    
            print '将本地文件%s上传到为服务器文件%s' % (LocalFile.decode('gbk').encode('utf-8'), filename.decode('gbk').encode('utf-8'))
    
            server_dir = filename[0:filename.rindex('/') + 1]
            ftp.cwd(server_dir)
            # todo:检查服务器上路径server_dir是否存在,若不存在则创建
    
            short_filename = filename[filename.rindex('/') + 1:]
            #local_dir = LocalFile[0:LocalFile.rindex('/') + 1]
            #print 'local_dir is ', local_dir
            #if (not os.path.exists(local_dir)):
            #    print '创建目录%s' % local_dir
            #    os.makedirs(local_dir)
    
            LocalFile = LocalFile.decode('utf-8').encode('gbk')
            fh = open(LocalFile, 'rb')
            ftp.storbinary('STOR ' + LocalFile, fh, bufsize)
            fh.close()
        B.extend(delta)
    
        print "==== !!任务结束!!"
        ftp.quit()
    
    def download_example():
        ftp = ftpconnect()
        local_sync_dir = 'g:/code/ftp_sync/sync'
        #filename = 'test.php'
        filename = '反卷积iccv2011.pdf'
        filename = filename.decode('utf-8').encode('gbk')
        LocalFile = local_sync_dir + '/' + filename
        #ftp.set_pasv(0)
        fh = open(LocalFile, 'wb')
        bufsize = 1000*1024
        ftp.retrbinary('RETR %s'%filename, fh.write, bufsize)
        fh.close()
        ftp.close()
    
    def upload_example():
        ftp = ftpconnect()
        local_sync_dir = 'g:/code/ftp_sync/sync'
        filename = 'test你.php'
        import chardet
        #print chardet.detect(filename)
    
        #filename = filename.decode('gbk').encode('utf-8')
        LocalFile = local_sync_dir + '/' + filename
        print 'LocalFile is : %s' % LocalFile
        LocalFile = LocalFile.decode('utf-8').encode('gbk')
        # ftp.set_pasv(0)
    
        fh = open(LocalFile, 'rb')
        bufsize = 1000 * 1024
        ftp.storbinary('STOR %s' % filename, fh, bufsize)
        fh.close()
        ftp.close()
    
    if __name__ == '__main__':
        #download_example()
        #upload_example()
        future_main()
    
  • 相关阅读:
    TCP四种定时器--学习笔记
    Python魔术师--self
    python的socket里 gethostbyname 与 gethostbyname_ex 的区别
    用python查看URL编码的中文
    基于linux 的2048
    用灵活的指针访问类私有变量
    ie8无法拉伸背景图
    图片的onerror 事件解析
    stream.js
    Promise
  • 原文地址:https://www.cnblogs.com/zjutzz/p/6255063.html
Copyright © 2011-2022 走看看