zoukankan      html  css  js  c++  java
  • 爬取百度图片中的图片(代码)

    import requests
    import os
    import uuid
    
    
    def get_id_uuid1():
        s_uuid = str(uuid.uuid1())
        l_uuid = s_uuid.split('-')
        s_uuid = ''.join(l_uuid)
        return s_uuid
    
    def test(pages_start,page_stop):
        for i in range(30 * pages_start, 30 * page_stop + 30, 30):
            print(i)
    
    #def getManyPages(keyword, pages):
    def getManyPages(keyword, pages_start,page_stop):
        params = []
        for i in range(30 * pages_start, 30 * page_stop + 30, 30):
            # print(i)
            params.append({
                'tn': 'resultjson_com',
                'ipn': 'rj',
                'ct': 201326592,
                'is': '',
                'fp': 'result',
                'queryWord': keyword,
                'cl': 2,
                'lm': -1,
                'ie': 'utf-8',
                'oe': 'utf-8',
                'adpicid': '',
                'st': -1,
                'z': '',
                'ic': 0,
                'word': keyword,
                's': '',
                'se': '',
                'tab': '',
                'width': '',
                'height': '',
                'face': 0,
                'istype': 2,
                'qc': '',
                'nc': 1,
                'fr': '',
                'pn': i,
                'rn': 30,
                'gsm': '3',
                '1550217860355': ''
            })
        url = 'https://image.baidu.com/search/acjson'
        urls = []
        for i in params:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
                'Connection': 'keep-alive',
                'content-type': 'application/json'
            }
            # response = requests.get(url, headers=headers, params=i, timeout=5000)
            # if response.content:
            #     data = response.json().get('data')
            #     urls.append(data)
            # else:
            #     print("出错了!")
            try:
                response = requests.get(url, headers=headers, params=i, timeout=5000)
                data = response.json().get('data')
                urls.append(data)
            except Exception as e:
                print(e)
        return urls
    
    
    def getImg(dataList, localPath):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36',
            'Connection': 'keep - alive',
            'content-type': 'application/json'
        }
        if not os.path.exists(localPath):  # 新建文件夹
            os.mkdir(localPath)
        for list in dataList:
            for i in list:
                if i.get('thumbURL') != None:
                    print('正在下载:%s' % i.get('thumbURL'))
                    ir = requests.get(i.get('thumbURL'), headers=headers)
                    name = get_id_uuid1()
                    open(localPath + '%s.jpg' % name, 'wb').write(ir.content)
                else:
                    print('图片链接不存在')
    
    
    if __name__ == '__main__':
        keylist = ['小泽玛利亚']
        for i in keylist:
            print(i)
            dataList = getManyPages(i, 1, 30)
            getImg(dataList, '/root/img/')
    
  • 相关阅读:
    20165309 Linux安装及学习
    20165309 技能学习经验与C语言
    20165309 我期望的师生关系
    20165317-我期望的师生关系
    20165308 学习基础和C语言基础调查
    20165308 我期望的师生关系
    20165320 结对编程学习第一周
    20165320 第七周学习总结
    20165320 第六周学习总结
    20165320 实验一 java环境的熟悉
  • 原文地址:https://www.cnblogs.com/sdhzdtwhm/p/10437018.html
Copyright © 2011-2022 走看看