zoukankan      html  css  js  c++  java
  • JD 评论晒图爬虫

    JD 评论晒图爬虫

    #coding=utf-8
    import requests
    import re
    import os
    
    __author__ = 'depy'
    
    """
    jd 评论晒图爬虫
    @productId 商品id
    @startpage 开始页数
    @endpage 结束页数
    """
    
    class JDPIC(object):
        def __init__(self,productId,startpage,endpage=20):
            self.headers = {
                'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
                'Accept':'*/*',
                'Accept-Encoding':'gzip, deflate, sdch, br',
                'Accept-Language':'zh-CN,zh;q=0.8',
                'Cookie':''
            }
            self.url = 'https://club.jd.com/discussion/getProductPageImageCommentList.action'
            self.startpage = startpage
            self.productId = productId
            self.endpage = endpage
    
        def sendReq(self,page):
            params = {
                'productId':self.productId,
                'isShadowSku':'0',
                'callback':'jQuery219465',
                'page':page,
                'pageSize':20
            }
            r = requests.get(self.url,params=params,headers=self.headers,timeout=10)
            regex = re.findall(r'"imageUrl":"//(.*?)"',r.text)
            return regex
    
        def downloadImageFile(self,imgUrl):
            local_filename = imgUrl.split('/')[-1]
            print "Download Image File=", local_filename
            imgUrl = 'http://'+imgUrl
            r = requests.get(imgUrl, headers =self.headers,stream=True, timeout=20)
            dirName = 'JDPIC1'
            if not os.path.exists(dirName):
                os.makedirs(dirName)
            with open(dirName+'/'+local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        f.flush()
                f.close()
    
    if __name__ == '__main__':
        J = JDPIC(1111,51,100)  #商品id自行修改
        #print J.endpage
        list = range(int(J.startpage),int(J.endpage)+1)
        for i in list:
            regexlist = J.sendReq(i)
            for picurl in regexlist:
                J.downloadImageFile(picurl)
    
        print "downpic success"
  • 相关阅读:
    排序算法的实现(冒泡,选择,插入 O(N*N)--理解方法实现
    HTTPS工作原理和TCP握手机制
    HTTP协议学习
    IP头,TCP头,UDP头,MAC帧头定义
    单链表的实现
    数字图像处理------中值滤波
    对于矩阵的理解-- by 孟岩老师
    java编码问题总结
    jsp数据库连接大全和数据库操作封装到Javabean
    构建一个高可扩展性javabean和jsp连接数据库操作
  • 原文地址:https://www.cnblogs.com/depycode/p/6933960.html
Copyright © 2011-2022 走看看