首先引入库文件,并进行数据读取
import pandas as pd import numpy as np data_Base=pd.read_csv("D:\Exam_Test\unicomapp_r0_201904_jinan.csv")
#data_Ite=pd.read_csv("D:\Exam_Test\lte_cm_jinan.csv",encoding="gbk") data_Base.shape
显示行与列:
print("行数{0},列数{1}".format(str(data_Base.shape[0]),str(data_Base.shape[1])))
查看字段空值数量:
data_Base.isnull().sum()
删除空值行:
data_Base.dropna(subset=["L-CELLID"],inplace=True)
每个 value 数值的采样点个数:
Df_SINR=data_Base["L-SINR"].value_counts().sort_index().reset_index() Df_SINR
柱状图:
#SINR 样本分布柱状图 from example.commons import Faker from pyecharts import options as opts from pyecharts.charts import Bar def bar_base() -> Bar: c = ( Bar() .add_xaxis(list(Df_SINR["index"])) .add_yaxis("SINR样本", list(Df_SINR["L-SINR"]),label_opts=opts.LabelOpts(is_show=False)) .set_global_opts( title_opts=opts.TitleOpts(title="SINR 样本分布",pos_left="center"), legend_opts=opts.LegendOpts(is_show=True,pos_left="right"), ) ) return c bar_base().render("L_SINR 分布.html")
获取不是某些值得列:
data_Base=data_Base[~data_Base["L-SINR"].isin(["1"])] data_Base.shape
设置最大显示列:
pd.set_option("display.max_columns",3000)
设置索引列:
data_Base=data_Base.set_index("RECTIME")
重采样:
data_Apr=data_Base.resample("D").mean().reset_index() data_Apr
根据指定列生成新的DataFrame:
data_Apr_new=pd.DataFrame(data_Apr,columns=["RECTIME","L-RSRP","L-SINR"])
根据时间获取哪天(Day):
data_Apr_new["RECTIME"].dt.day
双Y轴,趋势图:
#使用 pyecharts 或其他可视化工具,将每天平均 RSRP 和平均 SINR 趋势作图 import pyecharts.options as opts from example.commons import Faker from pyecharts.charts import Line def line_base() -> Line: c = ( Line() .add_xaxis(list(data_Apr_new["RECTIME"].dt.day)) .add_yaxis("SINR", data_Apr_new["L-SINR"].round(2),is_smooth=True,is_symbol_show=False) .extend_axis( yaxis=opts.AxisOpts( name="RSRP", min_=data_Apr_new["L-RSRP"].min().round(2), max_=data_Apr_new["L-RSRP"].max().round(2), ) ) .set_global_opts(title_opts=opts.TitleOpts(title="4 月份济南 RSRP 和 SINR 趋势图",pos_left="center"), legend_opts=opts.LegendOpts(pos_left="right"), datazoom_opts=opts.DataZoomOpts(is_show=True), yaxis_opts=opts.AxisOpts(name="SINR",min_=round(data_Apr_new["L-SINR"].values.min(),2),max_=round(data_Apr_new["L-SINR"].values.max(),2)), ) ) d = ( Line() .add_xaxis(list(data_Apr_new["RECTIME"].dt.day)) .add_yaxis("RSRP", data_Apr_new["L-RSRP"].round(2),yaxis_index=1,is_smooth=True,is_symbol_show=False) ) return c.overlap(d) line_base().render_notebook()
条件判断:
condition1=df_cm_new["样本量"]>100 condition2=df_cm_new["RSRP>=-110 采样点占比"]>0.8 condition3=df_cm_new["SINR>0 采样点占比"]<0.7 df_cm_new=df_cm_new[condition1&condition2&condition3] df_cm_new.head()
发送邮件:
#发送邮件 import smtplib from email.mime.text import MIMEText #MIME (Multipurpose Internet Mail Extensions) 是描述消息内容类型的因特网标准。MIME 消息能包含文本、图像、音频、视频以及其他应用程序专用的数据。 from email.mime.multipart import MIMEMultipart from email.header import Header from email.mime.application import MIMEApplication # 第三方 SMTP 服务 mail_host="smtp.qq.com" #设置服务器 mail_user="597945025@qq.com" #用户名 mail_pass="cwtytropotbubgai" #口令 sender = '597945025@qq.com' receivers = ['625645840@qq.com'] # 接收邮件,设置为接收方的邮箱 #创建一个带附件的实例 message = MIMEMultipart() message['From'] = Header(sender, 'utf-8') message['To'] = Header(str(receivers), 'utf-8') subject = 'mail test' message['Subject'] = Header(subject, 'utf-8') #邮件正文内容 message.attach(MIMEText('这是邮箱测试,请查收', 'plain', 'utf-8')) for city in City2: # f = 'D:\data.csv' xlsxpart = MIMEApplication(open(city+".xls", 'rb').read()) xlsxpart.add_header('Content-Disposition', 'attachment', filename=('gbk', '', (city+".xls"))) message.attach(xlsxpart) try: smtpObj = smtplib.SMTP() smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号 smtpObj.login(mail_user,mail_pass) smtpObj.sendmail(sender, receivers, message.as_string()) print ("邮件发送成功") except smtplib.SMTPException: print ("Error: 无法发送邮件")
地图:
from pyecharts.charts import Map from pyecharts.charts import Page from pyecharts import options as opts city =df_last["City2"] val_min_rsrp,val_max_rsrp = df_last["RSRP>=-110 采样点占比"].min().round(2),df_last["RSRP>=-110 采样点占比"].max().round(2) val_min_sinr,val_max_sinr =df_last["SINR>0 采样点占比"].min().round(2),df_last["SINR>0 采样点占比"].max().round(2) visual_color = ['#df2f48','#dfa59b','#1c39ca','#80d327'] def map_left() -> Map: c = ( Map() .add("", [list(z) for z in zip(list(city), list(df_last["RSRP>=-110 采样点占比"]))], "济南") .set_global_opts( title_opts=opts.TitleOpts(title="济南各区县4G网络良好覆盖(RSRP>=-110)比例分布图",pos_left="center"), visualmap_opts=opts.VisualMapOpts(min_=val_min_rsrp,max_=val_max_rsrp,range_color=visual_color), tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %") ) ) return c def map_right() -> Map: c = ( Map() .add("", [list(z) for z in zip(list(city), list(df_last["SINR>0 采样点占比"]))], "济南") .set_global_opts( title_opts=opts.TitleOpts(title="济南各区县4G网络良好质量(SINR>0)比例分布图",pos_left="center"), visualmap_opts=opts.VisualMapOpts(min_=val_min_sinr,max_=val_max_sinr), tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %") ) ) return c page = Page(interval=0) page.add(map_left(), map_right()) page.render_notebook()
对指定列进行处理(正则)
具体过程:首先将其转化成str,然后进行正则表达式匹配
data_Base['USER-ID'] = data_Base['USER-ID'].astype(np.str) import re pattern = re.compile(r'^[-+]?[-0-9]d*.d*|[-+]?.?[0-9]d*$') data_Base["USER-ID"]=data_Base["USER-ID"].apply(lambda x:x if pattern.match(x) else np.nan)
zip转化指定格式:
将三列(名称,经度,纬度)
# # a=[list (z) for z in zip(grid_no["longitude"],grid_no["latitude"])] # list( zip(grid_no["longitude"],grid_no["latitude"]) ) # list( grid_no["grid_no"])
#zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 base = { inx: list(item) for inx, item in zip( list( grid_no['grid_no'] ), list( zip(grid_no['longitude'], grid_no['latitude']) ) ) }
调用百度地图BMAP
with open('./data.json', 'w') as outfile: json.dump(base, outfile)
from pyecharts.charts import BMap import json BAIDU_AK = "GbQ806nWqGFMjuiGjTm6jPgcVGWICGA1" def bmap_base() -> BMap: c = ( BMap(init_opts = opts.InitOpts(height='615px',width='1350px')) .add_schema( baidu_ak=BAIDU_AK, center=[117.064366, 36.646401], zoom=15 ) .add_coordinate_json(json_file='./data.json') .add( "", data_pair =[list(z) for z in zip(list(data["grid_no"]),list(data["覆盖好质量差的质差样本占比"]))], label_opts=opts.LabelOpts(is_show=False), symbol_size=6, type_ = 'effectScatter' ) .add_control_panel(navigation_control_opts=opts.BMapNavigationControlOpts(), scale_control_opts=opts.BMapScaleControlOpts(), overview_map_opts=opts.BMapOverviewMapControlOpts(is_open=True,offset_width=0,offset_height=0)) .set_global_opts(title_opts=opts.TitleOpts(title="济南覆盖好质量差SINR质差栅格分布图",pos_left='center')) .set_series_opts(effect_opts=opts.EffectOpts(symbol='circle',scale=5,brush_type = "stroke")) ) return c bmap = bmap_base() bmap.render("济南质差栅格分布图.html") bmap.render_notebook()
堆叠柱状图
bar=Bar(init_opts = opts.InitOpts(height='350px')) bar.add_xaxis(list(prb_label_city_count["City1"].unique())) for label in prb_label_city_count["prb_label"].unique(): p = prb_label_city_count[prb_label_city_count["prb_label"]==label] bar.add_yaxis(label,list((p["prb_label_per"]*100).round(2)),stack="stack1") bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False)) bar.set_global_opts( title_opts=opts.TitleOpts(title="各地市PRB利用率分区间段分布图",pos_left="center"), legend_opts=opts.LegendOpts(pos_top="8%"), yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value} %"),max_=100), tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %") ) bar.render_notebook()
根据省信息标注市区地图:
city = avg_traffic['City1'] + "市" val_min,val_max = avg_traffic["Downlink traffic at the PDCP Layer"].min(),avg_traffic["Downlink traffic at the PDCP Layer"].max() def map_shandong() -> Map: c = ( Map() .add("", [list(z) for z in zip(list(city), list(avg_traffic["Downlink traffic at the PDCP Layer"].round(2)))], "山东") .set_global_opts( title_opts=opts.TitleOpts(title="2月份各地市平均单小区忙时业务量",pos_left="center"), visualmap_opts=opts.VisualMapOpts(min_=val_min,max_=val_max), ) ) return c mymap = map_shandong() mymap.render() mymap.render_notebook()
根据特定关键字数据进行分段操作:
newtable["prb_label"] = pd.cut(newtable["Average downlink PRB usage"],[0,0.2,0.5,0.8,1],labels=["低负荷","中等负荷","高负荷","超高负荷"],include_lowest=True)