zoukankan      html  css  js  c++  java
  • Scrapyd 的远程部署和监控

    1. 安装Scrapyd

    sudo pip3.6 install scrapyd # 安装scrapyd服务
    sudo pip3.6 install scrapyd-client # 安装scrapyd客户端
    

    1.1 修改default_scrapyd.conf文件 使外部能访问 

    vim /usr/local/python3.6/lib/python3.6/site-packages/scrapyd/default_scrapyd.conf
    bind_address = 127.0.0.1   改成 bind_address = 0.0.0.0   # 通过ip访问
    

      

    2. 启动scrapyd服务

    scrapyd # 启动服务,端口号 6800
    

      

    远程连接: 打开浏览器,192.168.105.99:6800

    3. 部署scrapy项目到scrapyd上的配置

    # 1. scrapyd的配置名
    [deploy:scrapy_Tencent]
    

      

    # 2. 启动scrapyd服务的远程主机ip,localhost默认本机的

    url = http://localhost:6800/
    #url = http:192.168.xx.xxx:6800
    

      

    #- 3.项目名称

    project = Tencent
    

      

    4. 将scrapy项目部署到scrapyd上

    # -p 执行项目名,将Tencent按指定的配置信息部署到scrapyd
    scrapyd-deploy scrapy_Tencent -p Tencent
    

      


    5. 启动项目下指定的爬虫

    curl http://localhost:6800/schedule.json -d project=Tencent -d spider=tencent
    
    {"status": "ok", "jobid": "2f240b1171bc11e7ae5aa45e60e36247", "node_name": "PowerMac.local"}
    

      

    6. 停止项目下指定的爬虫

    curl http://localhost:6800/cancel.json -d project=Tencent -d job=2f240b1171bc11e7ae5aa45e60e36247
    

      


    7. 停止scrapyd的服务

    Control + C
    

      

    远程部署和访问scrapyd
    1.查看服务器是否开启了6800端口 scrapyd默认端口是6800
    2.更改默认访问地址:
    1.cd /use/lib/python3.6/site-packages/scrapyd(find / -name scrapyd)
    2.bind_address = 127.0.0.1 更改为bind_address = 0.0.0.0(default_scrapyd)
    3.解决报错0.0.0.0报错
    1.cd /use/lib/python3.6/site-packages/twisted/names (find / -name server.py)
    2.python3.6 server.py

    scrapyd 代码

    import requests
    # ---------------------------------------------------------------------------
    # scrapy-deploy scraoy_crm -p CRM
    # ---------------------------------------------------------------------------
    # 查看scraoyd负载状态
    daemonstatus_url = 'http://localhost:6800/daemonstatus.json'
    response = requests.get(url=daemonstatus_url, verify=False)
    response = {"status": "ok", "running": "0", "pending": "0", "finished": "0", "node_name": "node-name"}
    
    # ---------------------------------------------------------------------------
    # 将项目添加到scrapyd中,有就覆盖,没有就创建
    addversion_url = 'http://localhost:6800/addversion.json'
    addversion_data = {
        'project': '',  # 项目名称
        'version': '',  # 版本号
    }
    response = requests.post(url=addversion_url, data=addversion_data, verify=False)
    response = {"status": "ok", "spiders": 3}
    
    # ---------------------------------------------------------------------------
    # 运行scrapyd服务中爬虫
    schedule_url = 'http://localhost:6800/schedule.json'
    schedule_data = {
        'project': '',  # 项目名称
        'spider': '',  # 爬虫名称
        'setting': '',  # 运行爬虫的setting中的设置,   可选
        'jobid': '',  # 运行爬虫生产的id  关闭爬虫时候需要这个id    可选
        '_version': '',  # 使用的项目版本
    }
    response = requests.post(url=schedule_url, data=schedule_data, verify=False)
    response = {"status": "ok", "jobid": "6487ec79947edab326d6db28a2d86511e8247444"}
    # ---------------------------------------------------------------------------
    # 停止scrapyd中的爬虫
    cancel_url = 'http://localhost:6800/cancel.json'
    cancel_data = {
        'project': '',  # 项目名称
        'job': '',  # 要停止的爬虫id
    }
    response = requests.post(url=cancel_url, data=cancel_data, verify=False)
    response = {"status": "ok", "prevstate": "running"}
    # ---------------------------------------------------------------------------
    # 获取上传到scrapyd上的项目列表
    listprojects_url = 'http://localhost:6800/listprojects.json'
    response = requests.get(url=listprojects_url, verify=False)
    response = {"status": "ok", "projects": ["myproject", "otherproject"]}
    # ---------------------------------------------------------------------------
    # 获取项目可用的版本列表
    listverions_url = 'http://localhost:6800/listversions.json?project=myproject'
    response = requests.get(url=listverions_url, verify=False)
    response = {"status": "ok", "versions": ["r99", "r156"]}
    # ---------------------------------------------------------------------------
    # 获取爬虫可用的版本列表
    listspiders_url = 'ttp://localhost:6800/listspiders.json?project=myproject'
    response = requests.get(url=listspiders_url, verify=False)
    response = {"status": "ok", "spiders": ["spider1", "spider2", "spider3"]}
    # ---------------------------------------------------------------------------
    # 获取scrapyd项目中运行状态
    listjobs_url = 'http://localhost:6800/listjobs.json?project=myproject'
    response = requests.get(url=listjobs_url, verify=False)
    response = {"status": "ok",
                "pending": [{"id": "78391cc0fcaf11e1b0090800272a6d06", "spider": "spider1"}],
                "running": [{"id": "422e608f9f28cef127b3d5ef93fe9399", "spider": "spider2",
                             "start_time": "2012-09-12 10:14:03.594664"}],
                "finished": [{"id": "2f16646cfcaf11e1b0090800272a6d06", "spider": "spider3",
                              "start_time": "2012-09-12 10:14:03.594664", "end_time": "2012-09-12 10:24:03.594664"}]}
    # ---------------------------------------------------------------------------
    # 删除项目版本
    delversion_url = 'http://localhost:6800/delversion.json'
    delversion_data = {
        'project': '',  # 项目名称
        'version': '',  # 版本号
    }
    response = requests.post(url=listjobs_url, data=delversion_data, verify=False)
    response = {"status": "ok"}
    # ---------------------------------------------------------------------------
    # 删除项目所有版本
    delproject_url = 'http://localhost:6800/delproject.json'
    delproject_data = {
        'project': '',  # 项目名称
    }
    response = requests.post(url=delproject_url, data=delproject_url, verify=False)
    response = {"status": "ok"}
    # ---------------------------------------------------------------------------
    

      

  • 相关阅读:
    Python DB API 连接数据库
    PHP base64多图片上传
    Linux vim编写程序时出现高亮字符,如何取消?
    CDN,内容分发网络。
    MySQL随机取数据
    tp5 快速接入扫码支付
    tp5定时器
    清空测试数据
    Centos Crontab查看状态和开启
    select2 使用
  • 原文地址:https://www.cnblogs.com/yoyo1216/p/10142776.html
Copyright © 2011-2022 走看看