zoukankan      html  css  js  c++  java
  • 爬虫2 urllib3 爬取30张百度图片

    import urllib3
    import re
    # 下载百度首页页面的所有图片
    # 1.    找到目标数据
    # page_url = 'http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%CD%BC%C6%AC&fr=ala&ala=1&alatpl=others&pos=0'
    # http = urllib3.PoolManager()
    # res = http.request('get',page_url)
    # print(res.data.decode('utf-8'))
    
    # Ajax的
    ajax_url = 'http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%9B%BE%E7%89%87&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word=%E5%9B%BE%E7%89%87&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&pn=30&rn=30&gsm=1e&1546957772498='
    http = urllib3.PoolManager()
    res = http.request('get',ajax_url)
    # print(res.data.decode())
    img_urls = re.findall(r'"thumbURL":"(.*?),',res.data.decode())
    # print(img_urls)
    # print(len(img_url))
    headers = {
        'Referer':'https://www.baidu.com/s?ie=utf-8&wd=%E5%9B%BE%E7%89%87'
    }
    for i , img_url in enumerate(img_urls):
        # print(img_url)
        img = http.request('get',img_url,headers=headers)
  • 相关阅读:
    vue的单向数据流
    vue的组件基础
    vue v-for的数组改变导致页面不渲染解决方法
    Binary Heap
    Types of Binary Tree
    Merge Sort
    Master Theorem
    Insertion Sort
    Amazon Redshift and Massively Parellel Processing
    Bubble Sort
  • 原文地址:https://www.cnblogs.com/cxhzy/p/10260839.html
Copyright © 2011-2022 走看看