zoukankan      html  css  js  c++  java
  • python多进程-进程池模式退出异常解决办法

    在执行python脚本的多进程程序时,会随机性的触发程序执行完,无法主动退出的情况。尤其是当进程数足够大时,处理的数据量足够多时。出现退出异常的概率越大。下面的脚本用于解决退出异常问题。

    import argparse
    import requests
    import getpass
    from multiprocessing import Pool
    import datetime
    import time
    from requests.packages.urllib3.exceptions import InsecureRequestWarning
    
    requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
    
    def get_parameter():
        parser=argparse.ArgumentParser(description='利用requests库批量访问url(v3)')
        parser.add_argument('-f',dest='inputFile',type=str,default='',help='url文件')
        parser.add_argument('-o',dest='outputFile',type=str,default='result.txt',help='结果保存文件')
        parser.add_argument('-p',dest='proxyServer',type=str,default='',help='设置代理服务器,默认不指定')
        parser.add_argument('-n',dest='processNum',type=int,default='1',help='指明脚本进程数,缺省为1')
        args=parser.parse_args()
        inputFile=args.inputFile
        outputFile=args.outputFile
        proxyServer=args.proxyServer
        processNum=args.processNum
        return inputFile,outputFile,proxyServer,processNum
    
    def set_proxy(urlProxy):
        if not urlProxy:
            proxies={}
        else:
            username=input('username:')
            password=getpass.getpass('password:')
            http_proxy='http://'+str(username)+':'+str(password)+'@'+str(urlProxy)
            https_proxy='https://'+str(username)+':'+str(password)+'@'+str(urlProxy)
            proxies={
                'http':http_proxy,
                'https':https_proxy
            }
        return proxies
    
    def get_url(urlFile):
        with open(urlFile,'r') as urlFile:
            allUrl=urlFile.readlines()
        return allUrl
    
    def http_request(url,proxy=''):
        headers={
            'User-Agent':'curl/3.03',
            'Connection':'close'    # keep-alive
        }
        try:
            r=requests.get(url,headers=headers,proxies=proxy,timeout=15,verify=False)
            urlresult=url+'	'+str(r.status_code)
        except Exception as e:
            urlresult=url+'	'+str(e)
        finally:
            print(urlresult)
            return urlresult
    
    def main():
        start_time=datetime.datetime.now()
        inputFile, outputFile, proxyServer, processNum=get_parameter()
        allUrl=get_url(inputFile)
        proxies=set_proxy(proxyServer)
        p=Pool(processNum)
        print('总URL数量:{}'.format(len(allUrl)))
        def writer_log(urlresult):
            with open(outputFile,'a+') as wf:
                wf.write(urlresult+'
    ')
        # with open(outputFile,'w+') as wf:
        for i in allUrl:
            url=i.split()[-1]
            result=p.apply_async(http_request,args=(url,proxies),callback=writer_log)
        p.close()
        count=0
        while True:
            try:
                time.sleep(60)
                if result.ready():
                    count+=1
                    time.sleep(180)
                    result.get()
                if count>4:
                    break
            except Exception as e:
                print('进程异常:{}'.format(str(e)))
        p.terminate()
        p.join()
        end_time=datetime.datetime.now()
        print('开始时间:{}'.format(start_time))
        print('结束时间:{}'.format(end_time))
        print('总耗时:{}'.format(end_time-start_time))
        print('结果保存在:{}'.format(outputFile))
    
    if __name__=='__main__':
        main()
    

      

  • 相关阅读:
    centos 安装 TortoiseSVN svn 客户端
    linux 定时任务 日志记录
    centos6.5 安装PHP7.0支持nginx
    linux root 用户 定时任务添加
    composer 一些使用说明
    laravel cookie写入
    laravel composer 安装指定版本以及基本的配置
    mysql 删除重复记录语句
    linux php redis 扩展安装
    linux php 安装 memcache 扩展
  • 原文地址:https://www.cnblogs.com/wlinuxtop/p/13170313.html
Copyright © 2011-2022 走看看