zoukankan html css js c++ java

爬虫2 urllib3 爬取30张百度图片

import urllib3
import re
# 下载百度首页页面的所有图片
# 1.    找到目标数据
# page_url = 'http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%CD%BC%C6%AC&fr=ala&ala=1&alatpl=others&pos=0'
# http = urllib3.PoolManager()
# res = http.request('get',page_url)
# print(res.data.decode('utf-8'))

# Ajax的
ajax_url = 'http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E5%9B%BE%E7%89%87&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word=%E5%9B%BE%E7%89%87&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&pn=30&rn=30&gsm=1e&1546957772498='
http = urllib3.PoolManager()
res = http.request('get',ajax_url)
# print(res.data.decode())
img_urls = re.findall(r'"thumbURL":"(.*?),',res.data.decode())
# print(img_urls)
# print(len(img_url))
headers = {
    'Referer':'https://www.baidu.com/s?ie=utf-8&wd=%E5%9B%BE%E7%89%87'
}
for i , img_url in enumerate(img_urls):
    # print(img_url)
    img = http.request('get',img_url,headers=headers)

查看全文

相关阅读:
Shell for
rsync 目录斜杠
 shell local
linux secureCRT utf-8编码显示
 eclipse maven 项目不显示 target目录
 如何打印身份证实际大小
 linux 去掉 ^M
hibernate 之集合映射中list映射
 hibernate 之复合主键映射
 hibernate 之组件映射

原文地址：https://www.cnblogs.com/cxhzy/p/10260839.html

最新文章
JDBC
AbstractList
工厂方法模式
 简单工厂
 Vector
AbstractCollection
集合
 浅谈：如何写一份好简历
 test maekdown 2
Markdown 代码测试