zoukankan      html  css  js  c++  java
  • spark读写Sequoiadb

    spark如何读写Sequoiadb,最近被客户问多了,这个记录下。

    Spark读Sequoiadb数据:

    package marketing
    
    import com.sequoiadb.hadoop.io.BSONWritable
    import com.sequoiadb.hadoop.mapreduce.SequoiadbInputFormat
    import org.apache.hadoop.conf.Configuration
    import org.apache.spark.{SparkContext, SparkConf}
    
    /**
      * Created by joy on 2015/12/15.
      */
    object Read extends App {
        val conf = new SparkConf().setAppName("cgbdata").
          setMaster("local").registerKryoClasses(Array(classOf[BSONWritable]))
    
        val sc = new SparkContext(conf)
        val hadoopConfig = new Configuration()
        hadoopConfig.set("sequoiadb.input.url","master:11810,slave1:11810,slave2:11810")
        hadoopConfig.set("sequoiadb.in.collectionspace","default")
        hadoopConfig.set("sequoiadb.in.collection","bar")
        val sdbRDD = sc.newAPIHadoopRDD[Object,BSONWritable,SequoiadbInputFormat](hadoopConfig,classOf[SequoiadbInputFormat],classOf[Object], classOf[BSONWritable])
        sdbRDD.map(_._2.getBson).collect.map(println)
        sc.stop()
    }

    Spark写Sequoiadb

    package marketing
    
    import com.sequoiadb.hadoop.io.BSONWritable
    import com.sequoiadb.hadoop.mapreduce.SequoiadbOutputFormat
    import org.apache.hadoop.conf.Configuration
    import org.apache.hadoop.io.{NullWritable, IntWritable}
    import org.apache.spark.{SparkConf, SparkContext}
    import org.bson.BasicBSONObject
    import org.bson.types.ObjectId
    
    /**
     * Hello world!
     *
     */
    object Save extends App {
      val sparkconf = new SparkConf().setMaster("local[2]").setAppName("save").registerKryoClasses(Array(classOf[BSONWritable]))
      val sc = new SparkContext(sparkconf)
    
      var data = sc.parallelize(List((NullWritable.get(),new BSONWritable(new BasicBSONObject("name","gaoxing")))))
      val config = new Configuration()
      config.set("sequoiadb.output.url","master:11810")
      config.set("sequoiadb.out.collectionspace","foo")
      config.set("sequoiadb.out.collection","bar")
      data.saveAsNewAPIHadoopFile("",classOf[NullWritable],classOf[BSONWritable],classOf[SequoiadbOutputFormat],config)
    }
    

      

  • 相关阅读:
    Python split()方法分割字符串
    Python创建线程
    Python find()方法
    webpack中‘vant’全局引入和按需引入【vue-cli】
    webpack中‘mint-ui’全局引入和按需引入【vue-cli】
    nginx中 处理post方式打开页面的报错405
    nginx中 vue路由去掉#后的配置问题
    webpack中 VUE使用搜狐ip库查询设备ip地址
    webpack中 VUE使用百度地图获取地理位置
    VUE动态设置网页head中的title
  • 原文地址:https://www.cnblogs.com/gaoxing/p/5048826.html
Copyright © 2011-2022 走看看