zoukankan      html  css  js  c++  java
  • spark实现分页查询hbase

    第一种:

      import org.apache.hadoop.hbase.io.ImmutableBytesWritable
      import org.apache.spark.{SparkConf, SparkContext}
      import org.apache.hadoop.hbase.mapreduce.TableInputFormat
      import org.apache.hadoop.hbase.protobuf.ProtobufUtil
      import org.apache.hadoop.hbase.util.{Base64, Bytes}
      import org.apache.spark.rdd.RDD
      import org.apache.hadoop.hbase.HBaseConfiguration
     import org.apache.hadoop.hbase.client.Result
     import org.apache.hadoop.hbase.client.Scan
     import org.apache.hadoop.hbase.filter._
     import org.apache.hadoop.hbase.util.Bytes    
     val sparkConf = new SparkConf().setAppName("HbaseTest").setMaster("local[1]")
         val sc = new SparkContext(sparkConf)
         val conf = HBaseConfiguration.create()
         conf.set("hbase.zookeeper.quorum",Spark_HbaseUtil.getProperties("bootstrap.servers") )
         val tableName = "sinldo:hos_index"
         conf.set(TableInputFormat.INPUT_TABLE, tableName)
         //开始rowKey和结束rowKey一样代表精确查询的某条数据
         val startRowkey = lastRowKey
        // 组装scan语句  startRowkey  stopRowkey可以写成参数
         val scan = new Scan(Bytes.toBytes(startRowkey))
         //true代表不查询全表
       scan.setCacheBlocks(true)
         scan.setCaching(9)
        val filterList: FilterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
         val filter = new PageFilter(10);
    val proto = ProtobufUtil.toScan(scan)
    val scanToString = Base64.encodeBytes(proto.toByteArray)
    conf.set(TableInputFormat.SCAN, scanToString)
    val hBaseRDD: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
    //获取数量
    val count = hBaseRDD.count()
    println(count)

    第二种:将第一种的设置开始RoeKey的地方换成

        import org.apache.hadoop.hbase.filter.RowFilter
        import org.apache.hadoop.hbase.filter.BinaryComparator;
         val filterList: FilterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
         val filter = new PageFilter(10)
      //002e代表开始的rowKey
        val rowFilter2: Filter = new RowFilter(CompareFilter.CompareOp.GREATER, new BinaryComparator(Bytes.toBytes("022e")));
        filterList.addFilter(filter)
        filterList.addFilter(rowFilter2)
        scan.setFilter(filterList)
  • 相关阅读:
    java日期格式化
    Map遍历方法
    Eclipse常用快捷键
    mysql事务块处理
    oracle事务块示例
    取得服务器或应用程序当前路径
    tomcat下运行war包
    java通过CLASSPATH读取包内文件
    Hive分析窗体函数之SUM,AVG,MIN和MAX
    LeetCode 145 Binary Tree Postorder Traversal(二叉树的兴许遍历)+(二叉树、迭代)
  • 原文地址:https://www.cnblogs.com/xuesheng/p/9630217.html
Copyright © 2011-2022 走看看