zoukankan      html  css  js  c++  java
  • 爬虫2 urllib3 爬取30张百度图片

    import urllib3
    import re
    # 下载百度首页页面的所有图片
    # 1.    找到目标数据
    # page_url = 'http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%CD%BC%C6%AC&fr=ala&ala=1&alatpl=others&pos=0'
    # http = urllib3.PoolManager()
    # res = http.request('get',page_url)
    # print(res.data.decode('utf-8'))
    
    # Ajax的
    ajax_url = 'http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%9B%BE%E7%89%87&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word=%E5%9B%BE%E7%89%87&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&pn=30&rn=30&gsm=1e&1546957772498='
    http = urllib3.PoolManager()
    res = http.request('get',ajax_url)
    # print(res.data.decode())
    img_urls = re.findall(r'"thumbURL":"(.*?),',res.data.decode())
    # print(img_urls)
    # print(len(img_url))
    headers = {
        'Referer':'https://www.baidu.com/s?ie=utf-8&wd=%E5%9B%BE%E7%89%87'
    }
    for i , img_url in enumerate(img_urls):
        # print(img_url)
        img = http.request('get',img_url,headers=headers)
  • 相关阅读:
    HDU 1114 Piggy-Bank
    HDU 2955 Robberies
    NTOJ 290 动物统计(加强版)
    POJ 3624 Charm Bracelet
    HDU 2602 Bone Collector
    POJ 1523 SPF(无向图割顶)
    HDU 5311 Hidden String
    HDU 1421 搬寝室
    HDU 1058 Humble Numbers
    POJ 3259 Wormholes(spfa判负环)
  • 原文地址:https://www.cnblogs.com/cxhzy/p/10260839.html
Copyright © 2011-2022 走看看