zoukankan      html  css  js  c++  java
  • 淘宝商品信息爬取,实现词云,并进行饼状图绘制及效果图

    对应json文件地址:https://blog.csdn.net/nicholas_K/article/details/86094530

    1. 获取了淘宝手机商品的评论和追评

    2. 对淘宝评论进行了词云

    3. 实现了淘宝手机商品版本的饼状图绘制

    import json
    import time
    import pygal
    import pymysql.cursors
    from wordcloud import WordCloud
    
    # 打开对应淘宝json文件。
    def get_comments():
    	# 这里打开的是上面的对应json文件,文件名要正确
        with open('tb_comments_1.json', encoding='utf-8') as tb:
            comments_dict = json.load(tb)
    
            # 这是找到追评里的内容
            comments = comments_dict['rateDetail']
            comments2 = comments['rateList']
    
            result_list = []
            for comment in comments2:
                # 判断是不是追评
                if comment['appendComment']:
                    comment['appendComment'] =  comment['appendComment']["content"]
                # 添加到列表
                result_list.append({
                    'id': comment['id'],
                    'content': comment['appendComment'],
                    'rateContent': comment['rateContent'],
                    'auctionSku': comment['auctionSku'],
                    'rateDate': comment['rateDate']
                })
    
    
    
            return result_list
    
    
    # 连接数据库
    def save_db(comments):
        connection = pymysql.connect(host='127.0.0.1',
                                     port=3306,
                                     user='root',
                                     password='zhangkai',
                                     db='tb',
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)
        try:
            connection.ping(reconnect=True)
        except:
            connection()
        for comment in comments:  # 循环评论
            cursor = connection.cursor()  # 创建游标
            # 先判断一下是否已存储过
            sql1 = "select id from tb.taobao where taobao_id=%s " % (comment['id'])
            cursor.execute(sql1)
            rs_set = cursor.fetchone()  # 有值返回{'id':23} 无值返回None
            if rs_set:
                print('这条评论已存在在数据库中')
                continue
    
            sql = """INSERT INTO tb.taobao VALUES (%s, %s ,%s ,%s ,%s, %s)"""
            for n in comments:
                cursor.execute(sql, args=(None, n["id"], n["rateContent"], n["auctionSku"], n["rateDate"], n["content"]))
            connection.commit()
            time.sleep(1.2)
            print('添加成功')
    
        sql2 = """select rate, content from tb.taobao"""
        cursor.execute(sql2)
        rs_set = cursor.fetchall()
    
        # 查询手机版本信息
        sql3 = """SELECT COUNT(*)as num,auctionint FROM tb.taobao group by auctionint"""
        cursor.execute(sql3)
        rs_sets = cursor.fetchall()
        return rs_set, rs_sets
    
    # 把所有评论转成一个大字符串
    def jieba_db(comments):
        jieba_list = comments
        tb_str = ''
        for i in jieba_list:
            tb_str += i['rate']
    
        return tb_str
    
    
    # 生成词云
    def word_cloud(string):
        # font是字体
        font = 'msyhl.ttc'
        wordcloud = WordCloud(font_path=font,
                              background_color="white",
                              width=1000,
                              height=860,
                              max_font_size=30,
                              min_font_size=10,
                              margin=2).generate(string)
    
        wordcloud.to_file('淘宝词云.png')
    
        return None
    
    # 生成饼状图
    def pygals(comments):
        x = 0
        for i in comments:
            x = x + i['num']
    
        pie_chart = pygal.Pie()
        pie_chart.title = '购买手机颜色比例(in % )'
        for i in comments:
    
            pie_chart.add(i['auctionint'], i['num']/x*100)
        pie_chart.render_to_file('淘宝.svg')
        # svg文件用浏览器打开
    
        print('绘图成功')
    
    
    if __name__ == '__main__':
        comment = get_comments()
        save, banben = save_db(comments=comment)
        taobao_jieba = jieba_db(comments=save)
        ciyun = word_cloud(string=taobao_jieba)
        print(ciyun)
    
    

    词云图片效果如下

    在这里插入图片描述

    饼状图效果如下

    在这里插入图片描述
    在这里插入图片描述

  • 相关阅读:
    window 配置 sendmail
    无限级分类
    sublime
    php笔记
    Python:开发_基本流程
    python学习笔记,视频day13-python基础知识练习题(二)
    python学习笔记,视频day12-列表、元组、字典+整理
    java学习笔记- Math类、String类
    java学习笔记-if语句、Switch语句
    java学习笔记-循环
  • 原文地址:https://www.cnblogs.com/nicholas7464/p/10257484.html
Copyright © 2011-2022 走看看