zoukankan      html  css  js  c++  java
  • spark geoip

    import java.io.File
    import scala.io.Source
    import com.sanoma.cda.geoip.MaxMindIpGeo
    import com.sanoma.cda.geo.Point
    import java.io.PrintWriter
    
    val geoIp = MaxMindIpGeo("/data/elas-input/GeoIP2-City.mmdb", 1000,synchronized = true)
    
    def iter_dir(srcDir:String,dstDir:String): Unit ={
      val files = (new File(srcDir)).listFiles().filter(_.isFile)
      for( item <- files){
        println(item.getName)
        val dstname = item.getName
        val out = new PrintWriter(s"""${dstDir}/${dstname}""")
    
        for(line <- Source.fromFile(item).getLines()){
          val it = line.split("	")
          val geo = geoIp.getLocation(it(0))
          if(geo.isEmpty){
            out.printf("%s,%s,%s,%s
    ",it(0),it(1),it(2),it(3),it(4),"")
          }
          else{
            val geoGet = geo.get
            val countryCode = geoGet.countryCode.getOrElse("")
            val countryName = geoGet.countryName.getOrElse("")
            val region = geoGet.region.getOrElse("")
            val city = geoGet.city.getOrElse("")
            val geoPoint = geoGet.geoPoint
            val latitude = if(geoPoint.isEmpty) "" else geoPoint.get.latitude.toString
            val longitude = if(geoPoint.isEmpty) "" else geoPoint.get.longitude.toString
            val postalCode = geoGet.postalCode.getOrElse("")
            val continent = geoGet.continent.getOrElse("")
            out.printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
    ",it(0),it(1),it(2),it(3),it(4),countryCode,countryName,region,city,latitude,longitude,postalCode,continent,it(5))
          }
        }
        out.close()
      }
    }
    iter_dir("/data/elas-input/uniqServiceDir","/data/elas-input/tsoutput")
    
    val str2 = "North Amercia"
    val index = str.indexOf(str2)
    val index2 = str.length + index + 1
    val content = str.substring(index2)
    
  • 相关阅读:
    假期学习2
    假期学习1
    读《需求工程--软件建模和分析》一
    数据清洗
    Mapreduce实例——WordCount
    SEVEN python环境jieba分词的安装 以即热词索引
    SIX Spark Streaming 编程初级实践
    FIVE Spark SQL 编程初级实践
    FOUR spark-shell 交互式编程
    THREE SPAKR
  • 原文地址:https://www.cnblogs.com/mayidudu/p/5909215.html
Copyright © 2011-2022 走看看