参考内容:
python3爬虫、数据清洗与可视化实践
pyecharts中文官网: https://pyecharts.org/#/zh-cn/intro
微信公众号: 数据分析1408
数据集链接
链接:https://pan.baidu.com/s/1NL-hapsT3rDRkTEzVCptcA 提取码:ax4s
Echarts是由百度开发的一款开源免费,覆盖各行业图表的纯JavaScript的可视化库,其可以提供直观、生动、可交互和高度个性化定制的数据可视化图表。Echarts具有丰富的图表类型,包括常规图、用于地理数据可视化的地图、用于关系数据可视化的关系图、还有用于BI的漏斗图和仪表盘,并且Echart支持图与图之间的混搭。
Echarts案例:http://echarts.baidu.com/examples.html
官方网址:http://echarts.baidu.com/feature.html#chart-types
初识pyecharts
pyecharts是一个用于生成Echarts图表的类库,能利用几行代码轻松生成Echarts风格的图表。python是数据挖掘、数据分析的优秀工具、pyecharts是数据可视化的优秀工具。当python遇见pyeacharts能擦出什么样的火花呢?
安装: pip install pyecharts
# 默认安装最新的版本
# 基本调用
from pyecharts.charts import Bar
bar = Bar()
bar.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
bar.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
# render 会生成本地 HTML 文件,默认会在当前目录生成 render.html 文件
# 也可以传入路径参数,如 bar.render("mycharts.html")
bar.render_notebook() # 渲染成本地文件
# pyecharts 所有方法均支持链式调用。
from pyecharts.charts import Bar
bar = (
Bar()
.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
)
bar.render_notebook() # 渲染成本地文件
# 使用 options 配置项,在 pyecharts 中,一切皆 Options。
from pyecharts.charts import Bar
from pyecharts import options as opts
bar = (
Bar()
.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
.set_global_opts(title_opts=opts.TitleOpts(title="主标题", subtitle="副标题"))
)
bar.render_notebook() # 渲染成本地文件
# pyecharts 提供了 10+ 种内置主题,开发者也可以定制自己喜欢的主题。
from pyecharts.charts import Bar
from pyecharts import options as opts
# 内置主题类型可查看 pyecharts.globals.ThemeType
from pyecharts.globals import ThemeType
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK))
.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
.add_yaxis("商家B", [15, 6, 45, 20, 35, 66])
.set_global_opts(title_opts=opts.TitleOpts(title="主标题", subtitle="副标题"))
)
bar.render_notebook()
一个商业分析案例带你熟悉常见的pyecharts图表
bash分析背景:探索某电商市场衣物清洁剂各品类数据占比
饼图
import json
from pyecharts import options as opts
from pyecharts.charts import Page, Pie
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/pies.json")
data = json.load(f)
name = data['name']
sales = data['sales']
sales_volume = data['sales_volume']
pie = (
Pie() # rosetype : 默认、area和 radius
.add("成交量", [list(z) for z in zip(name,sales_volume)],center=[400,300],radius=["10%","50%"],rosetype="area")
.set_global_opts(
title_opts=opts.TitleOpts(title="衣服清洗剂市场占比",subtitle="成交量"),
legend_opts=opts.LegendOpts(
orient = "horizontal", pos_top = "5%", pos_left = "10%"
)
)
.set_series_opts(
label_opts=opts.LabelOpts(
formatter="{b}: {c}"
)
)
)
pie2 = (
Pie()
.add("销售额", [list(z) for z in zip(name,sales)],center=[400,300],radius=["10%","50%"])
.set_global_opts(
title_opts=opts.TitleOpts(title="衣服清洗剂市场占比",subtitle="销售额"),
legend_opts=opts.LegendOpts(
orient = "horizontal", pos_top = "5%", pos_left = "10%"
)
)
.set_series_opts(
label_opts=opts.LabelOpts(
formatter="{b}: {c}"
)
)
)
pie.render_notebook()
pie2.render_notebook()
漏斗图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Funnel
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/pies.json")
data = json.load(f)
name = data['name']
sales = data['sales']
sales_volume = data['sales_volume']
funnle = (
Funnel()
.add("成交量", [list(z) for z in zip(name,sales_volume)], label_opts='intside')
.set_global_opts(
title_opts=opts.TitleOpts(title="衣服清洗剂市场占比",subtitle="成交量"),
legend_opts=opts.LegendOpts(
orient = "vertical", pos_top = "12%", pos_left = "2%"
)
)
)
funnle.render_notebook()
柱形图和条形图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Bar
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/pies.json")
data = json.load(f)
name = data['name']
sales = data['sales']
sales_volume = data['sales_volume']
bar = (
Bar()
.add_xaxis(name)
.add_yaxis("成交量", [sv for sv in sales_volume])
.add_yaxis("销售额", [s for s in sales])
.set_global_opts(
title_opts=opts.TitleOpts(title="衣服清洗剂市场占比柱形图"),
)
.reversal_axis() # 添加此句变为条形图
.set_series_opts(
label_opts=opts.LabelOpts(is_show=False),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值"),])
)
)
bar.render_notebook()
通过该条形图可以看出在某电商衣物清洁剂市场中,洗衣液的销量占比73.75%,是市场容量最大的品类。
分析背景:探索某电商洗衣液市场趋势
简单折线图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Line
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/lines.json")
data = json.load(f)
date = data['date']
sales1 = data['sales1']
sales2 = data['sales2']
line = (
Line()
.add_xaxis(date)
.add_yaxis("成交量",sales1,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],symbol="diamond",symbol_size=30),
)
.add_yaxis(
"销售额",sales2,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="洗衣液月销售情况"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
line.render_notebook()
堆叠折线图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Line
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/lines.json")
data = json.load(f)
date = data['date']
sales1 = data['sales1']
sales2 = data['sales2']
line = (
Line()
.add_xaxis(date)
.add_yaxis("成交量",sales1,is_smooth=True,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],symbol="diamond",symbol_size=30),
)
.add_yaxis(
"销售额",sales2,is_smooth=True,stack=True,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="洗衣液月销售情况"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
line.render_notebook()
阶梯折线图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Line
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/lines.json")
data = json.load(f)
date = data['date']
sales1 = data['sales1']
sales2 = data['sales2']
line = (
Line()
.add_xaxis(date)
.add_yaxis("成交量",sales1,is_step=True,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],symbol="diamond",symbol_size=30),
)
.add_yaxis(
"销售额",sales2,is_step=True,stack=True,
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="洗衣液月销售情况"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
line.render_notebook()
面积折线图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Line
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/lines.json")
data = json.load(f)
date = data['date']
sales1 = data['sales1']
sales2 = data['sales2']
line = (
Line()
.add_xaxis(date)
.add_yaxis("成交量",sales1,
areastyle_opts=opts.AreaStyleOpts(opacity=0.6),
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],symbol="diamond",symbol_size=30),
)
.add_yaxis(
"销售额",sales2,
areastyle_opts=opts.AreaStyleOpts(opacity=0.4),
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average",name="平均值")]),
markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max",name="最大值"),opts.MarkPointItem(type_="min",name="最小值")],),
)
.set_global_opts(
title_opts=opts.TitleOpts(title="洗衣液月销售情况"),
xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
line.render_notebook()
通过图表可以看出洗衣液在该电商网站秋冬季成交量的增长幅度较大。但根据生活常识,夏天人们出汗多,洗衣服的频率会更高,因此相应的洗衣液损耗较大。
通过业务理解,以下两方面的原因造成该网站秋冬季的成交量高于夏季的成交量。
- 第一,该网站从9月起有多个大型活动,由于价格比平时都要优惠,许多用户提前购买。
- 第二,进入秋季很多大件衣物,如大衣、被单、被子等需要集中清洗,对洗衣液的需求增大。
分析背景:指标完成情况
仪表盘
from pyecharts.charts import Gauge
gauge = (
Gauge()
.add("完成情况",[("完成率", 82.4)])
.set_global_opts(title_opts=opts.TitleOpts(title="指标完成情况"))
)
gauge.render_notebook()
水球图
from pyecharts.charts import Liquid
liquid = (
Liquid()
.add("完成率",[ 0.542])
.set_global_opts(title_opts=opts.TitleOpts(title="指标完成情况"))
)
liquid .render_notebook()
分析背景:买家评价舆情分析
词频统计
结巴分词的安装请参考:https://www.cnblogs.com/sinlearn/p/12670522.html
词云
import pandas as pd
from pyecharts.charts import WordCloud
wd = pd.read_csv("E:/Data/5/cp.csv",header=0,encoding='gbk')
keyword = [i[0] for i in wd[['关键词']].values]
count = [int(i[0]) for i in wd[['词频']].values]
wordcloud = (
WordCloud()
.add("", [(k,c) for k,c in zip(keyword,count)] ,
shape='star', rotate_step=20, # mask_image="E:/Data/5/3.png" 自定义图片
)
.set_global_opts(title_opts=opts.TitleOpts(title="洗衣液评论词频统计"))
)
wordcloud.render_notebook()
通过词云可以看出消费者主要关注:包装、活动、价格、正品(品质)。
分析背景:探索销售额与高质宝贝数之间的关系
散点图
import json
from pyecharts import options as opts
from pyecharts.charts import Page,Scatter
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/scatters.json")
data = json.load(f)
xs = data['xs']
gb = data['gb']
scatter = (
Scatter()
.add_xaxis(xs)
.add_yaxis('关系',gb)
.set_global_opts(
title_opts=opts.TitleOpts(title="销售额与高质宝贝数"),
xaxis_opts=opts.AxisOpts(type_="value",splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(type_="value",splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
scatter.render_notebook()
涟漪散点图
from pyecharts import options as opts
from pyecharts.charts import EffectScatter
from pyecharts.globals import SymbolType
# 读取数据 数据可从文章开头链接下载
f = open("E:/Data/5/scatters.json")
data = json.load(f)
xs = data['xs']
gb = data['gb']
effectscatter = (
EffectScatter()
.add_xaxis(xs)
.add_yaxis('关系', gb)
.set_global_opts(
title_opts=opts.TitleOpts(title="销售额与高质宝贝数"),
xaxis_opts=opts.AxisOpts(type_="value",splitline_opts=opts.SplitLineOpts(is_show=True)),
yaxis_opts=opts.AxisOpts(type_="value",splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
effectscatter.render_notebook()
通过散点图可以看出销售额与高质宝贝数呈正相关趋势。
其它常用图表
上面这个案例,包含了一些常用图表,但是强大的pyecharts的功能远不止如此,下面着重介绍一下箱线图、地理图、3D图、热力图、日历图、自定义组合图。
箱线图
from pyecharts import options as opts
from pyecharts.charts import Boxplot
# 最小值,下四分位数,中位数、上四分位数、最大值
# [min, Q1, median (or Q2), Q3, max]
data1 = [[2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3]]
data2 = [[2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3]]
boxplot = (
Boxplot()
.add_xaxis(["降水量","蒸发量"])
.add_yaxis('降水',Boxplot.prepare_data(data1))
.add_yaxis('蒸发',Boxplot.prepare_data(data2))
.set_global_opts(
title_opts=opts.TitleOpts(title="一年的降水量与蒸发量"),
yaxis_opts=opts.AxisOpts(type_="value",name="单位:万立方米",splitline_opts=opts.SplitLineOpts(is_show=True)),
)
)
boxplot.render_notebook()
地理图
from pyecharts import options as opts
from pyecharts.charts import Map
import random
province = ['广东', '湖北', '湖南', '四川', '重庆', '黑龙江', '浙江', '山西', '河北', '安徽', '河南', '山东', '西藏']
data = [(i, random.randint(50, 150)) for i in province]
_map = (
Map()
.add("销售额", data, "china")
.set_global_opts(
title_opts=opts.TitleOpts(title="Map-基本示例"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(max_=200, is_piecewise=True),
)
)
_map.render_notebook()
from pyecharts import options as opts
from pyecharts.charts import Geo, Page
from pyecharts.globals import ChartType, SymbolType
geo = Geo() #定义地理图
geo.width = "800px" #设置画布宽度
geo.height = "600px" #设置画布高度
geo.set_global_opts(visualmap_opts = opts.VisualMapOpts(max_=3),title_opts=opts.TitleOpts(title="各省对口支援湖北流向"))#全局设置项
geo.add_schema(maptype="china", itemstyle_opts=opts.ItemStyleOpts( border_color="#111", color="#454545"),)#添加主题,中国地图,填充及边界颜色设置
geo.add("",
[("黄冈",2),("黄石",1),("宜昌",1),("襄阳",2),
("潜江",1),("恩施",1),("孝感",2),("咸宁",1),
("随州",1),("仙桃",1),("神农架",1),("鄂州",1),
("荆州",2),("十堰",1),("天门",1),("荆门",2),],
type_=ChartType.HEATMAP,
label_opts=opts.LabelOpts(is_show = True),)
geo.add( "",[("北京",1),("上海", 1),("重庆", 1),("黑龙江", 1),
("广东", 1),("海南", 1),("福建", 1),("贵州",1),
("天津", 1),("山东", 1),("湖南", 1),("辽宁",1),
("宁夏", 1),("内蒙古",1),("浙江", 1),("云南",1),
("河北", 1),("江西", 1),("江苏", 1),("山西", 3),("山广西", 1)],
type_=ChartType.EFFECT_SCATTER,
label_opts=opts.LabelOpts(is_show=False),)#散点的另一种形式
#设置流向
geo.add(
"流向图",
[("重庆","孝感"),("黑龙江","孝感"),("广东","荆州"),("海南","荆州"),
("福建","宜昌"),("贵州","鄂州"),("天津","恩施"),("山东","黄冈"),
("湖南","黄冈"),("辽宁","襄阳"),("宁夏","襄阳"),("内蒙古","荆门"),
("云南","咸宁"),("河北","神农架"),("江西","随州"),("江苏","黄石"),
("山西","仙桃"),("山西","天门"),("山西","潜江"),("广西","十堰"),],
type_=ChartType.LINES,
linestyle_opts=opts.LineStyleOpts(curve=0.4,color="#00BFFF"), #基本线条的弯曲程度及颜色,
effect_opts=opts.EffectOpts(symbol=SymbolType.ROUND_RECT, symbol_size=6, color="#FFA500") # 流向线条的形式、颜色
)
#生成图片
geo.render_notebook()
3D 散点图
from pyecharts.charts import Scatter3D
import random
data = [
[random.randint(0, 100),
random.randint(0, 100),
random.randint(0, 100)] for _ in range(580)
]
range_color = [
'#313695', '#4575b4', '#74add1', '#abd9e9', '#e0f3f8', '#ffffbf',
'#fee090', '#fdae61', '#f46d43', '#d73027', '#a50026']
scatter3D = Scatter3D()
scatter3D.add("", data)
scatter3D.render_notebook()
import math
from pyecharts.charts import Surface3D
def create_surface3d_data():
for t0 in range(-60, 60, 1):
y = t0 / 60
for t1 in range(-60, 60, 1):
x = t1 / 60
if math.fabs(x) < 0.1 and math.fabs(y) < 0.1:
z = '-'
else:
z = math.tan(x * math.pi) * math.sin(y * math.pi)
yield [x, y, z]
range_color = ["#313695","#4575b4", "#74add1","#abd9e9","#e0f3f8","#ffffbf", "#fee090","#fdae61","#f46d43","#d73027", "#a50026",]
_data = list(create_surface3d_data())
surface3d = Surface3D()
surface3d.add( "",_data,)
surface3d.set_global_opts(title_opts=opts.TitleOpts("3D曲面图示例"))
surface3d.render_notebook()
热力图
from pyecharts.charts import HeatMap
from pyecharts import options as opts
from pyecharts.faker import Faker
import random
# 示例数据
data = [[i, j, random.randint(0, 50)] for i in range(24) for j in range(7)]
heat = (HeatMap()
.add_xaxis(Faker.clock)
.add_yaxis("访客数",
Faker.week,
data,
label_opts=opts.LabelOpts(is_show=True, position="inside"))
.set_global_opts(
title_opts=opts.TitleOpts(title="HeatMap-基本示例", subtitle="我是副标题"),
visualmap_opts=opts.VisualMapOpts(),
legend_opts=opts.LegendOpts(is_show=False))
)
heat.render_notebook()
from pyecharts.charts import Calendar
from pyecharts import options as opts
import random
import datetime
# 示例数据
begin = datetime.date(2020, 1, 1)
end = datetime.date(2020, 12, 31)
data = [[str(begin + datetime.timedelta(days=i)), random.randint(1000, 25000)]
for i in range((end - begin).days + 1)]
"""
日历图示例:
"""
calendar = (
Calendar()
.add("微信步数", data, calendar_opts=opts.CalendarOpts(range_="2020"))
.set_global_opts(
title_opts=opts.TitleOpts(title="Calendar-基本示例", subtitle="记步统计"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
max_=25000,
min_=1000,
orient="horizontal",
is_piecewise=True,
pos_top="230px",
pos_left="100px",
)
)
)
calendar.render_notebook()
pyecharts 是真的强大,常用的先熟悉了吧,以后用到了个别较怪的图形,在做做进一步学习吧。