zoukankan      html  css  js  c++  java
  • spark读写Sequoiadb

    spark如何读写Sequoiadb,最近被客户问多了,这个记录下。

    Spark读Sequoiadb数据:

    package marketing
    
    import com.sequoiadb.hadoop.io.BSONWritable
    import com.sequoiadb.hadoop.mapreduce.SequoiadbInputFormat
    import org.apache.hadoop.conf.Configuration
    import org.apache.spark.{SparkContext, SparkConf}
    
    /**
      * Created by joy on 2015/12/15.
      */
    object Read extends App {
        val conf = new SparkConf().setAppName("cgbdata").
          setMaster("local").registerKryoClasses(Array(classOf[BSONWritable]))
    
        val sc = new SparkContext(conf)
        val hadoopConfig = new Configuration()
        hadoopConfig.set("sequoiadb.input.url","master:11810,slave1:11810,slave2:11810")
        hadoopConfig.set("sequoiadb.in.collectionspace","default")
        hadoopConfig.set("sequoiadb.in.collection","bar")
        val sdbRDD = sc.newAPIHadoopRDD[Object,BSONWritable,SequoiadbInputFormat](hadoopConfig,classOf[SequoiadbInputFormat],classOf[Object], classOf[BSONWritable])
        sdbRDD.map(_._2.getBson).collect.map(println)
        sc.stop()
    }

    Spark写Sequoiadb

    package marketing
    
    import com.sequoiadb.hadoop.io.BSONWritable
    import com.sequoiadb.hadoop.mapreduce.SequoiadbOutputFormat
    import org.apache.hadoop.conf.Configuration
    import org.apache.hadoop.io.{NullWritable, IntWritable}
    import org.apache.spark.{SparkConf, SparkContext}
    import org.bson.BasicBSONObject
    import org.bson.types.ObjectId
    
    /**
     * Hello world!
     *
     */
    object Save extends App {
      val sparkconf = new SparkConf().setMaster("local[2]").setAppName("save").registerKryoClasses(Array(classOf[BSONWritable]))
      val sc = new SparkContext(sparkconf)
    
      var data = sc.parallelize(List((NullWritable.get(),new BSONWritable(new BasicBSONObject("name","gaoxing")))))
      val config = new Configuration()
      config.set("sequoiadb.output.url","master:11810")
      config.set("sequoiadb.out.collectionspace","foo")
      config.set("sequoiadb.out.collection","bar")
      data.saveAsNewAPIHadoopFile("",classOf[NullWritable],classOf[BSONWritable],classOf[SequoiadbOutputFormat],config)
    }
    

      

  • 相关阅读:
    原型模式
    单例模式-2(注册式单例)
    单例模式-1(懒汉式、饿汉式)
    PBKDF2加密的实现
    vue中获取客户端IP地址
    微信公众平台,监听用户输入及事件
    详解二叉树的遍历-前中后序遍历/层序遍历-递归/迭代
    联想小新pro关机变慢的解决办法
    MySQL自学笔记系列
    MySQL入门-首先搞清楚【数据类型】
  • 原文地址:https://www.cnblogs.com/gaoxing/p/5048826.html
Copyright © 2011-2022 走看看