zoukankan      html  css  js  c++  java
  • Hbase实践

    取Hive中的数据写入Hbase中

    package hbase
    
    import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
    import org.apache.hadoop.hbase.client.{HTable, Put}
    import org.apache.hadoop.hbase.mapred.TableOutputFormat
    import org.apache.hadoop.hbase.util.Bytes
    import org.apache.hadoop.mapred.JobConf
    import org.apache.spark.sql.SparkSession
    
    object SparkHbase {
      def main(args: Array[String]): Unit = {
    //    client 请求hbase,写数据 zookeeper
        val ZOOKEEPER_QUORUM = "192.168.174.134,192.168.174.135,192.168.174.129"
    //    读取hive中的数据写入hbase,创建sparksession
        val spark = SparkSession.builder()
          .appName("spark to hbase")
          .enableHiveSupport()
          .getOrCreate()
    
        val rdd = spark.sql("select order_id,user_id,order_dow from badou.orders limit 300").rdd
    
        /**
          * 一个put对象就是一行记录,在构造方法中主键rowkey(user_id作为rowkey)
          * 所有插入的数据必须用org.apache.hadoop.hbase.util.Bytes
          * */
        rdd.map{row=>
          val order_id = row(0).asInstanceOf[String]
          val user_id = row(1).asInstanceOf[Long]
          val order_dow = row(2).asInstanceOf[Int]
    
    //      加处理逻辑user_id为主key
          var p = new Put(Bytes.toBytes(user_id))
    //      id 列族存放所有id类型列,order为列,value对应的order_id
          p.addColumn(Bytes.toBytes("id"),Bytes.toBytes("order"),Bytes.toBytes(order_id))
    //      num为列族存放所有num数值型列,dow为列,order_dow为具体值
          p.addColumn(Bytes.toBytes("num"),Bytes.toBytes("dow"),Bytes.toBytes(order_dow))
          p    //返回值
        }.foreachPartition{partiton=>
          val jobconf = new JobConf(HBaseConfiguration.create())
          jobconf.set("hbase.zookeeper.quorum",ZOOKEEPER_QUORUM)
          jobconf.set("hbase.zookeeper.property.clientPort","2181")
          jobconf.set("zookeeper.znode.parent","/hbase")
          jobconf.setOutputFormat(classOf[TableOutputFormat])
    //      写入表名
          val table = new HTable(jobconf,TableName.valueOf("orders"))
          import scala.collection.JavaConversions._
          table.put(seqAsJavaList(partiton.toSeq))
        }
      }
    
    }
    View Code

    创建Hive表去分析Hbase表中的数据

    create external table h_table(rowkey string,col1 string, col2 string, age int) 
    STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
    WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f1:col1,f1:col2,f1:age") 
    TBLPROPERTIES("hbase.table.name" = "table1");
  • 相关阅读:
    Description Resource Path Location Type Java compiler level does not match the version of the instal
    myeclipse导入项目后,项目类中报Base64错
    Oracle中查询一个字符串的长度的函数
    异常QueryTimeoutException和for input String
    myeclipse中的内存溢出PermGen space
    SecureCRT--下重启服务器
    清除tomcat的缓存
    oracle 定时任务 job 调用存储过程有回到输出参数(含out参数)
    Spring之AOP
    @RequestParam、@PathVariable、@RequestBody区别
  • 原文地址:https://www.cnblogs.com/xumaomao/p/12743404.html
Copyright © 2011-2022 走看看