zoukankan      html  css  js  c++  java
  • python pandas相关知识点(练习)

    首先引入库文件,并进行数据读取

    import pandas as pd
    import numpy as np
    data_Base=pd.read_csv("D:\Exam_Test\unicomapp_r0_201904_jinan.csv")
    #data_Ite=pd.read_csv("D:\Exam_Test\lte_cm_jinan.csv",encoding="gbk") data_Base.shape

    显示行与列:

    print("行数{0},列数{1}".format(str(data_Base.shape[0]),str(data_Base.shape[1])))

    查看字段空值数量:

    data_Base.isnull().sum()

    删除空值行:

    data_Base.dropna(subset=["L-CELLID"],inplace=True)

    每个 value 数值的采样点个数:

    Df_SINR=data_Base["L-SINR"].value_counts().sort_index().reset_index()
    Df_SINR
    

     柱状图:

    #SINR 样本分布柱状图
    from example.commons import Faker
    from pyecharts import options as opts
    from pyecharts.charts import Bar
    def bar_base() -> Bar:
        c = (
            Bar()
            .add_xaxis(list(Df_SINR["index"]))
            .add_yaxis("SINR样本", list(Df_SINR["L-SINR"]),label_opts=opts.LabelOpts(is_show=False))
            .set_global_opts(
                title_opts=opts.TitleOpts(title="SINR 样本分布",pos_left="center"),
                legend_opts=opts.LegendOpts(is_show=True,pos_left="right"),
          
            )
        )
        return c
    bar_base().render("L_SINR 分布.html")

    获取不是某些值得列:

    data_Base=data_Base[~data_Base["L-SINR"].isin(["1"])]
    data_Base.shape

    设置最大显示列:

    pd.set_option("display.max_columns",3000)

    设置索引列:

    data_Base=data_Base.set_index("RECTIME")

    重采样:

    data_Apr=data_Base.resample("D").mean().reset_index()
    data_Apr

    根据指定列生成新的DataFrame:

    data_Apr_new=pd.DataFrame(data_Apr,columns=["RECTIME","L-RSRP","L-SINR"])

    根据时间获取哪天(Day):

    data_Apr_new["RECTIME"].dt.day

    双Y轴,趋势图:

    #使用 pyecharts 或其他可视化工具,将每天平均 RSRP 和平均 SINR 趋势作图
    import pyecharts.options as opts
    from example.commons import  Faker
    from pyecharts.charts import Line
    
    
    def line_base() -> Line:
        c = (
            Line()
            .add_xaxis(list(data_Apr_new["RECTIME"].dt.day))
            .add_yaxis("SINR", data_Apr_new["L-SINR"].round(2),is_smooth=True,is_symbol_show=False)
            .extend_axis(
                yaxis=opts.AxisOpts(
                    name="RSRP",
                    min_=data_Apr_new["L-RSRP"].min().round(2),
                    max_=data_Apr_new["L-RSRP"].max().round(2),
                   
                )
            )
            .set_global_opts(title_opts=opts.TitleOpts(title="4 月份济南 RSRP 和 SINR 趋势图",pos_left="center"),
                            legend_opts=opts.LegendOpts(pos_left="right"),
                             datazoom_opts=opts.DataZoomOpts(is_show=True),
                            yaxis_opts=opts.AxisOpts(name="SINR",min_=round(data_Apr_new["L-SINR"].values.min(),2),max_=round(data_Apr_new["L-SINR"].values.max(),2)),
                   
                            )
        )
        d = (
            Line()
            .add_xaxis(list(data_Apr_new["RECTIME"].dt.day))
            .add_yaxis("RSRP", data_Apr_new["L-RSRP"].round(2),yaxis_index=1,is_smooth=True,is_symbol_show=False)
            
        )
        return c.overlap(d)
    line_base().render_notebook()

    条件判断:

    condition1=df_cm_new["样本量"]>100
    condition2=df_cm_new["RSRP>=-110 采样点占比"]>0.8
    condition3=df_cm_new["SINR>0 采样点占比"]<0.7
    df_cm_new=df_cm_new[condition1&condition2&condition3]
    df_cm_new.head()

    发送邮件:

    #发送邮件
    import smtplib
    from email.mime.text import MIMEText    #MIME (Multipurpose Internet Mail Extensions) 是描述消息内容类型的因特网标准。MIME 消息能包含文本、图像、音频、视频以及其他应用程序专用的数据。
    from email.mime.multipart import MIMEMultipart
    from email.header import Header
    from email.mime.application import MIMEApplication
    # 第三方 SMTP 服务
    mail_host="smtp.qq.com"  #设置服务器
    mail_user="597945025@qq.com"    #用户名
    mail_pass="cwtytropotbubgai"   #口令 
    sender = '597945025@qq.com'
    receivers = ['625645840@qq.com']  # 接收邮件,设置为接收方的邮箱
    #创建一个带附件的实例
    message = MIMEMultipart()
    message['From'] = Header(sender, 'utf-8')
    message['To'] =  Header(str(receivers), 'utf-8')
    subject = 'mail test'
    message['Subject'] = Header(subject, 'utf-8') 
    
    #邮件正文内容
    message.attach(MIMEText('这是邮箱测试,请查收', 'plain', 'utf-8'))
    for city in City2:
    # f = 'D:\data.csv'
        xlsxpart = MIMEApplication(open(city+".xls", 'rb').read())
        xlsxpart.add_header('Content-Disposition', 'attachment', filename=('gbk', '', (city+".xls")))
        message.attach(xlsxpart)
    try:
        smtpObj = smtplib.SMTP() 
        smtpObj.connect(mail_host, 25)    # 25 为 SMTP 端口号
        smtpObj.login(mail_user,mail_pass)
        smtpObj.sendmail(sender, receivers, message.as_string())
        print ("邮件发送成功")
    except smtplib.SMTPException:
        print ("Error: 无法发送邮件")

    地图:

    from pyecharts.charts import Map
    from pyecharts.charts import Page
    from pyecharts import options as opts
    city =df_last["City2"]
    val_min_rsrp,val_max_rsrp = df_last["RSRP>=-110 采样点占比"].min().round(2),df_last["RSRP>=-110 采样点占比"].max().round(2)
    val_min_sinr,val_max_sinr =df_last["SINR>0 采样点占比"].min().round(2),df_last["SINR>0 采样点占比"].max().round(2)
    visual_color = ['#df2f48','#dfa59b','#1c39ca','#80d327']
    def map_left() -> Map:
        c = (
            Map()
            .add("", [list(z) for z in zip(list(city), list(df_last["RSRP>=-110 采样点占比"]))], "济南")
            .set_global_opts(
                title_opts=opts.TitleOpts(title="济南各区县4G网络良好覆盖(RSRP>=-110)比例分布图",pos_left="center"),
                visualmap_opts=opts.VisualMapOpts(min_=val_min_rsrp,max_=val_max_rsrp,range_color=visual_color),
                tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
            )
            )
        return c
    def map_right() -> Map:
        c = (
            Map()
            .add("", [list(z) for z in zip(list(city), list(df_last["SINR>0 采样点占比"]))], "济南")
            .set_global_opts(
                title_opts=opts.TitleOpts(title="济南各区县4G网络良好质量(SINR>0)比例分布图",pos_left="center"),
                visualmap_opts=opts.VisualMapOpts(min_=val_min_sinr,max_=val_max_sinr),
                tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
            )
        )    
        return c
    
    page = Page(interval=0)
    page.add(map_left(), map_right())
    page.render_notebook()

    对指定列进行处理(正则)

    具体过程:首先将其转化成str,然后进行正则表达式匹配

    data_Base['USER-ID'] = data_Base['USER-ID'].astype(np.str)
    import re
    pattern = re.compile(r'^[-+]?[-0-9]d*.d*|[-+]?.?[0-9]d*$')
    data_Base["USER-ID"]=data_Base["USER-ID"].apply(lambda x:x if pattern.match(x) else np.nan)

    zip转化指定格式:

    将三列(名称,经度,纬度)  

    # # a=[list (z) for z in zip(grid_no["longitude"],grid_no["latitude"])]
    # list( zip(grid_no["longitude"],grid_no["latitude"]) )
    # list( grid_no["grid_no"])
    #zip() 函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表
    base = { inx: list(item) for inx, item in zip( list( grid_no['grid_no'] ), list( zip(grid_no['longitude'], grid_no['latitude']) ) ) }

    调用百度地图BMAP

    with open('./data.json', 'w') as outfile:
        json.dump(base, outfile)
    from pyecharts.charts import BMap
    import json
    BAIDU_AK = "GbQ806nWqGFMjuiGjTm6jPgcVGWICGA1"
    def bmap_base() -> BMap:
        c = (
            BMap(init_opts = opts.InitOpts(height='615px',width='1350px'))
            .add_schema(
                baidu_ak=BAIDU_AK,
                center=[117.064366, 36.646401],
                zoom=15
            )
            .add_coordinate_json(json_file='./data.json')
            .add(
                "",
                data_pair =[list(z) for z in zip(list(data["grid_no"]),list(data["覆盖好质量差的质差样本占比"]))],
                label_opts=opts.LabelOpts(is_show=False),
                symbol_size=6,
                type_ = 'effectScatter'
            )
            .add_control_panel(navigation_control_opts=opts.BMapNavigationControlOpts(),
                              scale_control_opts=opts.BMapScaleControlOpts(),
                              overview_map_opts=opts.BMapOverviewMapControlOpts(is_open=True,offset_width=0,offset_height=0))
            
            .set_global_opts(title_opts=opts.TitleOpts(title="济南覆盖好质量差SINR质差栅格分布图",pos_left='center'))
            .set_series_opts(effect_opts=opts.EffectOpts(symbol='circle',scale=5,brush_type = "stroke"))
                    
        )
        return c
    bmap =  bmap_base()
    
    bmap.render("济南质差栅格分布图.html")
    bmap.render_notebook()

     堆叠柱状图

    bar=Bar(init_opts = opts.InitOpts(height='350px'))
    bar.add_xaxis(list(prb_label_city_count["City1"].unique()))
    for label in prb_label_city_count["prb_label"].unique():
        p = prb_label_city_count[prb_label_city_count["prb_label"]==label]
        bar.add_yaxis(label,list((p["prb_label_per"]*100).round(2)),stack="stack1") 
    
    bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    bar.set_global_opts(
                        title_opts=opts.TitleOpts(title="各地市PRB利用率分区间段分布图",pos_left="center"),
                       legend_opts=opts.LegendOpts(pos_top="8%"),
                       yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(formatter="{value} %"),max_=100),
                        tooltip_opts=opts.TooltipOpts(formatter="{b}:{c} %")
                        )
    bar.render_notebook()
    

     根据省信息标注市区地图:

    city = avg_traffic['City1'] + ""
    val_min,val_max = avg_traffic["Downlink traffic at the PDCP Layer"].min(),avg_traffic["Downlink traffic at the PDCP Layer"].max()
    def map_shandong() -> Map:
        c = (
            Map()
            .add("", [list(z) for z in zip(list(city), list(avg_traffic["Downlink traffic at the PDCP Layer"].round(2)))], "山东")
            .set_global_opts(
                title_opts=opts.TitleOpts(title="2月份各地市平均单小区忙时业务量",pos_left="center"),
                visualmap_opts=opts.VisualMapOpts(min_=val_min,max_=val_max),
            )
        )
        return c
    
    
    mymap = map_shandong()
    mymap.render()
    mymap.render_notebook()

    根据特定关键字数据进行分段操作:

    newtable["prb_label"] = pd.cut(newtable["Average downlink PRB usage"],[0,0.2,0.5,0.8,1],labels=["低负荷","中等负荷","高负荷","超高负荷"],include_lowest=True)
    

      

  • 相关阅读:
    git介绍
    Oracle '26-2月 -19 03.34.47.000000 下午' 字符串日期解析
    ProceedingJoinPoint 某些方法记录一下
    SpringBoot 2.X以上集成redis
    SpringBoot文件上传配置
    editmd输出到前端显示
    Thymeleaf Shiro标签
    Springboot 添加druid监控
    基于SpringBoot的博客项目
    SpringBoot默认首页配置
  • 原文地址:https://www.cnblogs.com/wangzhenghua/p/11081100.html
Copyright © 2011-2022 走看看