zoukankan      html  css  js  c++  java
  • IDEA中Spark往Hbase中写数据

    import org.apache.hadoop.hbase.HBaseConfiguration
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable
    import org.apache.hadoop.hbase.mapred.TableOutputFormat
    import org.apache.spark.{SparkConf, SparkContext}
    import org.apache.hadoop.hbase.client.{Put, Result}
    import org.apache.hadoop.hbase.util.Bytes
    import org.apache.hadoop.mapred.JobConf
    
    object 写Hbase数据 {
      def main(args: Array[String]): Unit = {
        val sparkConf=new SparkConf().setAppName("往Hbase中写数据").setMaster("local[2]")
        val sc=new SparkContext(sparkConf)
        val tableName="student"
       // sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE,tableName)
    
        val conf=HBaseConfiguration.create()
    
        val jobConf=new JobConf(conf)
         jobConf.setOutputFormat(classOf[TableOutputFormat])
        jobConf.set(TableOutputFormat.OUTPUT_TABLE,tableName)
        //构建新纪录
        val dataRDD=sc.makeRDD(Array("5,hadoop,B,29","6,spark,G,56"))
       val rdd=dataRDD.map(_.split(",")).map{x=>{
         val put=new Put(Bytes.toBytes(x(0))) //行健的值   Put.add方法接收三个参数:列族,列名,数据
         put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(x(1))) //info:name列的值
         put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(x(2))) //info:gender列的值
         put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(x(3)))//info:age列的值
         (new ImmutableBytesWritable,put)  ////转化成RDD[(ImmutableBytesWritable,Put)]类型才能调用saveAsHadoopDataset
       }}
        rdd.saveAsHadoopDataset(jobConf)
      }
    }

    结果:

    hbase(main):021:0> scan 'student'
    ROW                                    COLUMN+CELL                                                                                                     
     3                                     column=info:age, timestamp=1511079380185, value=29                                                              
     3                                     column=info:gender, timestamp=1511079380185, value=B                                                            
     3                                     column=info:name, timestamp=1511079380185, value=hadoop                                                         
     4                                     column=info:age, timestamp=1511079380185, value=56                                                              
     4                                     column=info:gender, timestamp=1511079380185, value=G                                                            
     4                                     column=info:name, timestamp=1511079380185, value=spark                                                          
     5                                     column=info:age, timestamp=1511079414301, value=29                                                              
     5                                     column=info:gender, timestamp=1511079414301, value=B                                                            
     5                                     column=info:name, timestamp=1511079414301, value=hadoop                                                         
     6                                     column=info:age, timestamp=1511079414301, value=56                                                              
     6                                     column=info:gender, timestamp=1511079414301, value=G                                                            
     6                                     column=info:name, timestamp=1511079414301, value=spark 
  • 相关阅读:
    Entity Framework Tips: IN关键字的支持
    (转载)用IT网络和安全专业人士视角来裁剪云的定义
    解决数据库查询中的锁冲突
    2010年计划
    MergeOption 枚举实测
    习惯的力量
    Entity Framewok中获取实体对象的部分属性
    JQuery下拉框联动本地数据
    Json学习整理
    Hadoop:mapreduce的splitsize和blocksize
  • 原文地址:https://www.cnblogs.com/soyo/p/7860152.html
Copyright © 2011-2022 走看看