zoukankan html css js c++ java

python爬取图片

import urllib.request
import requests
import json
from openpyxl import workbook  # 写入Excel表所用
from openpyxl import load_workbook  # 读取Excel表所用


def get_img(headers, url):
    request = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(request)
    data = response.read()
    return data


if __name__ == '__main__':
    wb = workbook.Workbook()
    ws = wb.active
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'}
    ws.append(
        ["briefName", "photoName", "mobileVideoPath", "photoPath", "price", "priceAccurate", "productId",
         "promoLabels",
         "promotionInfo", "skuName", "webVideoPath"])
    for n in range(1, 4):
        print(n)
        url = 'https://openapi.vmall.com/mcp/queryPrd?lang=zh-CN&country=CN&portal=1&keyword=36&pageSize=20&pageNum={}&searchSortField=0&searchSortType=desc&searchFlag=1&brandType=0&tid=c7a607e4827f084da988ab3816f08949&screenParams=%7B%7D&searchId=c7a607e4827f084da988ab3816f08949'.format(n)
        data = requests.get(url, headers=headers).text
        jsonData = json.loads(data)
        # with open('hw.txt','w') as file:
        #     for d in jsonData['resultList']:
        #         txt='"briefName":{},"photoName":{},"mobileVideoPath":{},"photoPath":{},' 
        #             '"price":{},"priceAccurate":{},"productId":{},"promoLabels":{},"promotionInfo":{}'
        #             '"skuName":{},"webVideoPath":{}'
        #             .format(d["briefName"],d["photoName"],d["mobileVideoPath"],d["photoPath"],d["price"],
        #                     d["priceAccurate"],d["productId"],d["promoLabels"],d["promotionInfo"],d["skuName"],
        #                     d["webVideoPath"])
        #         file.write(txt+'
')


        for d in jsonData['resultList']:
            with open('E:PyCharm项目pythonimgphone\428_428_{}'.format(d["photoName"]), 'wb') as file:
                print('https://res.vmallres.com/pimages/{}428_428_{}'.format(d["photoPath"], d["photoName"]))
                # file.write('https://res.vmallres.com/pimages/{}428_428_{}'.format(d["photoPath"],d["photoName"]))
                file.write(get_img(headers, 'https://res.vmallres.com/pimages/{}428_428_{}'.format(d["photoPath"],
                                                                                                   d["photoName"])))
            # print(type(d["promoLabels"]))
            ws.append(
                [d["briefName"], d["photoName"], d["mobileVideoPath"], d["photoPath"], d["price"], d["priceAccurate"],
                 d["productId"], str(d["promoLabels"]), d["promotionInfo"], d["skuName"], d["webVideoPath"]])
    wb.save('hw.xlsx')

查看全文

相关阅读:
一秒解决 ERROR 1044 (42000): Access denied for user ''@'localhost' to database 'mysql 问题
 30分钟让你学会 Spring事务管理属性
 判断是否是回文
 Linux自动获取IP地址重启不会消失
 .The server quit without updating PID file (/var/lib/mysql/pc.pid).
线程的状态
 linux vim基本操作
 C++ 函数重载和参数的缺省值
 C++ 类中的3种访问权限和继承方式
 C++ 内存管理

原文地址：https://www.cnblogs.com/xing-29391/p/13352836.html