zoukankan      html  css  js  c++  java
  • 爬虫

    from urllib.request import *
    import re
    #url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1548299141933_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E7%BE%8E%E5%A5%B3'
    url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1548300267853_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%8C%83%E5%86%B0%E5%86%B0'
    html = urlopen(url)
    obj = html.read().decode()
    urls = re.findall(r'"objURL":"(.*?)"',obj)
    
    index = 1
    for url in urls:
        if index < 50:
            try:
                print('正在下载第%d张'%(index))
                urlretrieve (url, '范冰冰'+ str(index) + '.jpg')
                index += 1
            except Exception:
                print('下载失败%d张' %index)
        else:
            print('下载完成')
            break
  • 相关阅读:
    jdk安装
    ubuntu 搜狗输入法成功安装
    matlab load
    matlab save 命令
    数字信号处理的思考
    非最大值抑制nms
    HMM,MEMM,CRF模型
    [bzoj2301][HAOI2011]Problem b
    [洛谷P3935]Calculating
    [bzoj4816][Sdoi2017]数字表格
  • 原文地址:https://www.cnblogs.com/xiaomai-rhce/p/10314359.html
Copyright © 2011-2022 走看看