zoukankan      html  css  js  c++  java
  • Python爬取百度图片

    import urllib.request as urqt
    import urllib.parse as urps
    from urllib.parse import quote
    import requests
    import os
    import re
    import sys
    def gethtml(url):
        header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
        res = requests.Session()
        res.headers = header
        html = res.get(url, timeout = 10, allow_redirects = False).text
        return html
    def getbyte(url):
        header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
        rep = urqt.Request(url, headers = header)
        return urqt.urlopen(rep).read()
    def makejpg(url, f):
        f.write(getbyte(url))
        f.close()
    def getintofold(string):
        os.chdir(r"D:信息python一些成品百度图片爬虫")
        want = string + "图片"
        wehave = os.listdir()
        if want in wehave:
            os.chdir(want)
        else:
            os.mkdir(want)
            os.chdir(want)
    def getall(num, url):
        key = re.compile(r'"thumbURL":"(.*?)"')
        tot = 0
        now = url
        while tot < num:
            html = gethtml(now)
            for now in re.findall(key, html):
                tot += 1
                if tot > num:
                    break;
                f = open(str(tot) + ".jpg", "wb")
                try:
                    makejpg(now, f)
                except BaseException:
                    print("错误")
                    tot -= 1
                    continue;
                print("第 " + str(tot) + " 个已下载")
            if tot > num:
                break
            now = url + "&pn=" + str(tot)
    def endd():
        g.msgbox("感谢使用")
        sys.exit()
    def init():
        now = input("请输入想要的图片: ")
        num = input("请输入想要的数量: ")
        getintofold(now)
        now = quote(now, encoding = 'utf-8')
        url = "https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1599885698346_R&pv=&ic=0&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=" + now
        getall(int(num), url)
    init()
    
    
  • 相关阅读:
    Swing中GridBagLayout布局的使用
    Android下Slidingmenu和actionbarsherlock的使用
    Swing基础知识
    Android下的ActionBar
    android下ADT的更新
    spark插件入门完整版本
    IOS开发准备 资料集锦
    Java调用本地浏览器
    Android源码在线查看网址
    Android中ProgressBar
  • 原文地址:https://www.cnblogs.com/olinr/p/13678868.html
Copyright © 2011-2022 走看看