zoukankan      html  css  js  c++  java
  • 使用python ftplib包递归下载文件夹及文件

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Date    : 2018-06-11 09:35:49
    # @Author  : Yaheng Wang (m13262578991@163.com)
    # @Link    : http://www.wy2160640.github.io
    # @Version : $Id$
    
    import os
    import sys
    from ftplib import FTP
    
    class FTPSync(object):
        def __init__(self):
            self.conn = FTP('ftp.hapmap.org')
            self.conn.login()
            self.conn.cwd('hapmap/frequencies/')
            os.chdir('/home/ftp.hapmap.org/hapmap/frequencies/')
    
        def get_dirs_files(self):
            dir_res = []
            self.conn.dir('.', dir_res.append)
            files = [f.split(None, 8)[-1] for f in dir_res if f.startswith('-')]
            dirs = [f.split(None, 8)[-1] for f in dir_res if f.startswith('d')]
            return files, dirs
    
        def walk(self, next_dir):
            sys.stderr.write('Walking to %s
    '%next_dir)
            self.conn.cwd(next_dir)
            try:
                os.mkdir(next_dir)
            except OSError:
                pass
            os.chdir(next_dir)
            ftp_curr_dir = self.conn.pwd()
            local_curr_dir = os.getcwd()
            files, dirs = self.get_dirs_files()
            sys.stdout.write("FILES: %s"%files)
            sys.stdout.write("DIRS: %s"%dirs)
            for f in files:
                sys.stdout.write("%s : %s"%(next_dir, f))
                sys.stdout.write("download : %s"%os.path.abspath(f))
                outf = open(f, "wb")
                try:
                    self.conn.retrbinary("RETR %s"%f, outf.write)
                finally:
                    outf.close()
            for d in dirs:
                os.chdir(local_curr_dir)
                self.conn.cwd(ftp_curr_dir)
                self.walk(d)
    
        def run(self):
            self.walk('.')
    
    def main():
        f = FTPSync()
        f.run()
    
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    14.UA池和代理池
    13.scrapy框架的日志等级和请求传参
    12.scrapy框架之递归解析和post请求
    11.scrapy框架持久化存储
    10.scrapy框架简介和基础应用
    09.移动端数据爬取
    08.Python网络爬虫之图片懒加载技术、selenium和PhantomJS
    07.验证码处理
    vi编辑器
    tar 压缩命令
  • 原文地址:https://www.cnblogs.com/yahengwang/p/9332580.html
Copyright © 2011-2022 走看看