zoukankan      html  css  js  c++  java
  • PYTHON 爬虫 baidu美女图片

    from urllib import request
    import re
    import os
    def main():
    #page=request.urlopen("http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fr=&sf=1&fmq=1461834053046_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&itg=0&ie=utf-8&word=%E5%A4%B4%E5%83%8F#z=0&pn=&ic=0&st=-1&face=0&s=0&lm=-1" )
    # "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E4%B8%AD%E5%9B%BD&oq=%E4%B8%AD%E5%9B%BD&rsp=-1")
    page = request.urlopen("https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E7%BE%8E%E5%A5%B3&oq=%E7%BE%8E%E5%A5%B3&rsp=-1")
    context=page.read().decode('utf-8')
    pic=re.findall("https://.*?jpg",context)
    count = 0; # 文件的起始名称为 0
    for url in pic:
    print(url)
    bytes = request.urlopen(url);
    if (url.find('.') != -1): # 2
    name =url[url.find('.', len(url) - 5):];
    f = open("D:/image/" + str(count) + name, 'wb'); # 代开一个文件,准备以二进制写入文件
    f.write(bytes.read()); # write并不是直接将数据写入文件,而是先写入内存中特定的缓冲区
    f.flush(); # 将缓冲区的数据立即写入缓冲区,并清空缓冲区
    f.close(); # 关闭文件
    count += 1;
    if __name__ == '__main__':
    main()
  • 相关阅读:
    [BZOJ4553][TJOI2016&HEOI2016]序列
    树套树乱讲的代码
    树套树乱讲
    [Luogu4174][NOI2006]最大获益
    [BZOJ3879]SvT
    [BZOJ3611][HEOI2014]大工程
    [BZOJ1501][NOI2005]智慧珠游戏
    [BZOJ1499][NOI2005]瑰丽华尔兹
    [BZOJ3460] Jc的宿舍
    [HDU4812]D Tree
  • 原文地址:https://www.cnblogs.com/tuozizhang/p/11086546.html
Copyright © 2011-2022 走看看