zoukankan      html  css  js  c++  java
  • Scala【需求二:求各省市的各个指标】

    需求处理步骤

    原始数据->json->过滤->列裁剪

    需求二:求各省市的各个指标

    原始数据

    文本pmt.json,每一行都是一个json字符串。里面包含ip等信息

    {"sessionid":"4KT69Su8FavGfydclctzpUBQwYfRT0KW","advertisersid":19,"adorderid":188182,"adcreativeid":2123233,"adplatformproviderid":353466,"sdkversion":"Android 5.0","adplatformkey":"hUHtJfmzYgkKQmBgS1XnmIwT1lwWJZis","putinmodeltype":1,"requestmode":1,"adprice":4295.0,"adppprice":5153.0,"requestdate":"2018-10-06","ip":"106.82.41.165","appid":"XRX1000057","appname":"Face++","uuid":"PXUw6oNaBbOLgE4qHzy1eRR0AP6zl0LC","device":"BLACK BARRY","client":4,"osversion":"","density":"","pw":960,"ph":640,"lang":"","lat":"","provincename":"","cityname":"","ispid":46003,"ispname":"电信","networkmannerid":0,"networkmannername":"WIFI","iseffective":1,"isbilling":1,"adspacetype":3,"adspacetypename":"全屏","devicetype":1,"processnode":1,"apptype":0,"district":"district","paymode":1,"isbid":1,"bidprice":4884.0,"winprice":74754.0,"iswin":1,"cur":"rmb","rate":0.0,"cnywinprice":0.0,"imei":"","mac":"52:54:00:b4:e6:10","idfa":"JOQYVMIIPWAEKPHZRDZNCDLJIUZFSBLZ","openudid":"","androidid":"","rtbprovince":"","rtbcity":"","rtbdistrict":"","rtbstreet":"","storeurl":"","realip":"222.89.26.142","isqualityapp":0,"bidfloor":0.0,"aw":0,"ah":0,"imeimd5":"","macmd5":"","idfamd5":"","openudidmd5":"","androididmd5":"","imeisha1":"","macsha1":"","idfasha1":"","openudidsha1":"","androididsha1":"","uuidunknow":"","userid":"YRNo94gOpa3hCANOpFhUBUpQKWfkDblZ","reqdate":null,"reqhour":null,"iptype":1,"initbidprice":0.0,"adpayment":171671.0,"agentrate":0.0,"lomarkrate":0.0,"adxrate":0.0,"title":"非常经典的句子:没有改变不了的未来,只有不想改变的过去","keywords":"莲藕,甘蔗,辣椒,美文","tagid":"bCvpm912U8soUBaF6QxAIC0PXn4E0KD3","callbackdate":"2018-10-06","channelid":"123489","mediatype":1,"email":"2ki0i@hotmail.com","tel":"13404821298","age":"54","sex":"1"}
    {"sessionid":"retXIU76Vpp8VZzc7uQvDtObLjRHLtRe","advertisersid":81,"adorderid":140687,"adcreativeid":2321312,"adplatformproviderid":1036820,"sdkversion":"IOS 11.2","adplatformkey":"U8oDbQfH66KCkAtU092evNM1OLvlIQcK","putinmodeltype":1,"requestmode":2,"adprice":7402.0,"adppprice":3552.0,"requestdate":"2018-10-06","ip":"123.234.117.194","appid":"XRX1000033","appname":"蝉大师","uuid":"xExEur14ellSeYq1wbsDzmw9aMTcW6BU","device":"IPHONE6","client":2,"osversion":"","density":"","pw":1334,"ph":750,"lang":"","lat":"","provincename":"","cityname":"","ispid":46000,"ispname":"移动","networkmannerid":3,"networkmannername":"2G","iseffective":1,"isbilling":1,"adspacetype":2,"adspacetypename":"插屏","devicetype":1,"processnode":3,"apptype":0,"district":"district","paymode":1,"isbid":1,"bidprice":9514.0,"winprice":48180.0,"iswin":1,"cur":"rmb","rate":0.0,"cnywinprice":0.0,"imei":"778207196118215","mac":"52:54:00:a0:6b:b1","idfa":"","openudid":"","androidid":"","rtbprovince":"","rtbcity":"","rtbdistrict":"","rtbstreet":"","storeurl":"","realip":"210.41.145.252","isqualityapp":0,"bidfloor":0.0,"aw":0,"ah":0,"imeimd5":"","macmd5":"","idfamd5":"","openudidmd5":"","androididmd5":"","imeisha1":"","macsha1":"","idfasha1":"","openudidsha1":"","androididsha1":"","uuidunknow":"","userid":"G2KTkDDjamgwbP5uFngqzZPplfesjRQ4","reqdate":null,"reqhour":null,"iptype":1,"initbidprice":0.0,"adpayment":69642.0,"agentrate":0.0,"lomarkrate":0.0,"adxrate":0.0,"title":"非常经典的句子:生活中的点点滴滴,都是因果关系","keywords":"美文","tagid":"lvcFc7R4YuaOzOPZ0W3QDBgClZVCIkWk","callbackdate":"2018-10-06","channelid":"123500","mediatype":2,"email":"lgd8554@hotmail.com","tel":"13704892122","age":"35","sex":"0"}
    
    

    需求说明

    期望数据

    按照省市为单位,输出各个指标信息

    1.导入依赖

        <dependencies>
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.62</version>
            </dependency>
            <dependency>
                <groupId>commons-httpclient</groupId>
                <artifactId>commons-httpclient</artifactId>
                <version>3.0.1</version>
            </dependency>
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.12</version>
            </dependency>
        </dependencies>
        <build>
            <plugins>
                <plugin>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>2.3.2</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                    </configuration>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
        </build>
    

    2.代码

    package com.bigdata.scala.homework
    
    import scala.io.Source
    import com.alibaba.fastjson.{JSON, JSONObject}
    import org.apache.commons.httpclient.HttpClient
    import org.apache.commons.httpclient.methods.GetMethod
    
    /**
     * @description: TODO
     * @author: HaoWu
     * @create: 2020年07月25日
     */
    object HomeWork {
      def main(args: Array[String]): Unit = {
        //1.读文件转为list
        val source = Source.fromFile("C:\Users\HaoWu\Desktop\pmt.json", "utf-8").getLines().toList
        //2.转为json数据
        val data = source.map(jsonStr => {
          val json = JSON.parseObject(jsonStr)
          val ip = json.getString("ip")
          val provincename = json.getString("provincename")
          val cityname = json.getString("cityname")
          val adplatformproviderid = json.getLong("adplatformproviderid")
          val requestmode = json.getInteger("requestmode")
          val processnode = json.getInteger("processnode")
          val iseffective = json.getInteger("iseffective")
          val isbilling = json.getInteger("isbilling")
          val isbid = json.getInteger("isbid")
          val iswin = json.getInteger("iswin")
          val adorderid = json.getLong("adorderid")
          val adcreativeid = json.getLong("adplatformproviderid")
          val winprice = json.getDouble("winprice")
          val adpayment = json.getDouble("adpayment")
          (ip, provincename, cityname, adplatformproviderid, requestmode, processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment)
        })
          //3.过滤出ip为空的数据
          .filter({
            case (ip, provincename, cityname, adplatformproviderid, requestmode, processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment) => "" != ip && null != ip
          })
          //4.//发起http请求根据ip获取省、市,返回省、市...信息
          .map({
            case (ip, provincename, cityname, adplatformproviderid, requestmode, processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment) => {
              val client = new HttpClient()
              val url = s"https://restapi.amap.com/v3/ip?ip=${ip}&key=f75418e64363b8a96d3565108638c5f1"
              val method = new GetMethod(url)
              val code = client.executeMethod(method)
              var provincename = ""
              var cityname = ""
              if (code == 200) {
                val responseBodyAsString = method.getResponseBodyAsString
                provincename = JSON.parseObject(responseBodyAsString).getString("province")
                cityname = JSON.parseObject(responseBodyAsString).getString("city")
              }
              //
              (provincename, cityname, adplatformproviderid, requestmode, processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment)
            }
          }) //List(([],[],804821,1,2,1,1,0,0,31547,804821,26153.0,37318.0), (陕西省,西安市,215884,3,3,1,1,1,0,167967,215884,14094.0,46195.0), (上海市,上海市,405441,1,3,1,1,0,1,52433,405441,22976.0,103778.0),
          //5.//过滤出省、市不为空的数据
          .filter({
            case (provincename, cityname, adplatformproviderid, requestmode,
            processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment) =>
              "" != provincename && null != provincename && "" != cityname && null != cityname
          }) //List((安徽省,合肥市,213685,2,3,1,1,1,0,41256,213685,7601.0,11174.0), (陕西省,西安市,580944,3,2,1,1,1,1,29928,580944,69692.0,91727.0),...)
    
        //6.按照省市分组
        val result = data.groupBy(x => (x._1, x._2)) //((安徽省,合肥市),List((安徽省,合肥市,213685,2,3,1,1,1,0,41256,213685,7601.0,11174.0),(安徽省,合肥市,213685,2,3,1,1,1,0,41256,213685,7601.0,11174.0),..))
          .map({
            y => {
              //省、市
              val province_city = y._1
              //原始请求数
              val requestAcount = y._2.filter({
                case (provincename, cityname, adplatformproviderid, requestmode,
                processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment) => (requestmode == 1 && processnode >= 1)
                case _ => false
              }).size
              //广告消费
              val advertConsume = y._2.filter({
                case (provincename, cityname, adplatformproviderid, requestmode,
                processnode, iseffective, isbilling, isbid, iswin, adorderid, adcreativeid, winprice, adpayment) =>
                  (adplatformproviderid >= 100000 && iseffective == 1 && isbilling == 1 && iswin == 1 && adorderid > 200000 && adcreativeid > 2000000)
              }).map(x => {
                x._13 / 1000
              }).sum
              (province_city, requestAcount, advertConsume)
            }
          })
        println(result) //List(((四川省,泸州市),1,0.0), ((宁夏回族自治区,吴忠市),0,0.0), ((上海市,上海市),54,0.0), ((吉林省,松原市),2,0.0)...)
      }
    }
    
    
  • 相关阅读:
    297.白盒测试
    301.多媒体讲台使用事项
    289.南信大知网登录
    296.deepin下载安装、root改密、cpu不降频、修改快捷键、创建启动器快捷方式、win文件访问、直接进win无deepin启动项
    295.博客园win&苹果PC客户端开源项目整理
    利用loganalyzer展示MySQL中rsyslog日志
    利用inotify和rsync实现数据的实时同步
    samba服务配置实践
    NFS服务配置实践
    FTP服务配置实践
  • 原文地址:https://www.cnblogs.com/wh984763176/p/13387479.html
Copyright © 2011-2022 走看看