zoukankan      html  css  js  c++  java
  • concurrent 多线程

     1 # -*- coding: utf-8 -*-
     2 from bs4 import BeautifulSoup
     3 import concurrent.futures
     4 import requests
     5 
     6 
     7 
     8 hd = {
     9     "cept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    10     "Accept-Encoding": "gzip, deflate",
    11     "Accept-Language": "zh-CN,zh;q=0.9",
    12     "Cache-Control": "max-age=0",
    13     "Connection": "keep-alive",
    14     "Host": "www.xxxx.com",
    15     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
    16     }
    17 
    18 # 输出到文件
    19 def write(path, text):
    20     f1 = open(path, 'ab')
    21     f1.write(bytes(text, encoding="utf-8"))
    22     f1.close()
    23 
    24 # 爬取动作
    25 def start(url):
    26     demo = BeautifulSoup(requests.get(url, headers = hd).text, "html.parser")
    27     for a in demo.find_all('div', class_='textlist-body'):
    28         print(a)
    29         # write("out.txt", url)
    30         write("out.txt", '{}, {}
    '.format(url, a))
    31 
    32
    33 def Country_url():
    34     url = "https://www.xxxx.com/"
    35     demo = BeautifulSoup(requests.get(url+"airports", headers = hd).text, "html.parser")
    36 
    37     for i in demo.find_all('div', class_='textlist-body'):
    38         url_li = [url+x.string.replace(" ", "-") for x in i if x != " "]  # 生成URL列表
    39         with concurrent.futures.ThreadPoolExecutor() as executor:
    40             results = executor.map(start, url_li)  # 利用map对列表传递
    41             for result in results:
    42                 print(result)
    43 
    44 
    45 
    46 if __name__ == "__main__":
    47     Country_url()
  • 相关阅读:
    !function() {}()
    element.dataset API
    正则匹配 数字和英文状态下的逗号
    《vim实用技巧》读书笔记
    ajax分页
    smarty分页类
    数组排序
    数组大类
    自动刷新价格
    简单购物车
  • 原文地址:https://www.cnblogs.com/bcode/p/13909218.html
Copyright © 2011-2022 走看看