zoukankan      html  css  js  c++  java
  • 1、Spark 通过api,hfile两种形式获取hbase数据,简单样例

    pom内容:

    <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>0.98.6-cdh5.2.0</version>
                <exclusions>
                    <exclusion>
                        <artifactId>javax.servlet-api</artifactId>
                        <groupId>javax.servlet</groupId>
                    </exclusion>
                    <exclusion>
                        <artifactId>javax.servlet</artifactId>
                        <groupId>org.eclipse.jetty.orbit</groupId>
                    </exclusion>
                    <exclusion>
                        <artifactId>servlet-api-2.5</artifactId>
                        <groupId>org.mortbay.jetty</groupId>
                    </exclusion>
                    <exclusion>
                        <artifactId>servlet-api</artifactId>
                        <groupId>javax.servlet</groupId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.10</artifactId>
                <version>1.5.2-hdh3.1.0</version>
                <exclusions>
                    <exclusion>
                        <artifactId>hadoop-client</artifactId>
                        <groupId>org.apache.hadoop</groupId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql_2.10</artifactId>
                <version>1.5.2-hdh3.1.0</version>
                <exclusions>
                    <exclusion>
                        <artifactId>javax.servlet-api</artifactId>
                        <groupId>javax.servlet</groupId>
                    </exclusion>
                </exclusions>
            </dependency>

    一、Hbase API获取hbase表数据

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.hbase.Cell;
     3 import org.apache.hadoop.hbase.HBaseConfiguration;
     4 import org.apache.hadoop.hbase.HColumnDescriptor;
     5 import org.apache.hadoop.hbase.client.HTable;
     6 import org.apache.hadoop.hbase.client.Result;
     7 import org.apache.hadoop.hbase.client.ResultScanner;
     8 import org.apache.hadoop.hbase.client.Scan;
     9 import org.apache.hadoop.hbase.util.Bytes;
    10 
    11 import java.io.IOException;
    12 import java.util.List;
    13 
    14 /**
    15  * 通过HbaseApi获取数据
    16  */
    17 public class DataAchieveFromHbaseApi {
    18     public static void main(String[] args) throws IOException {
    19         //Hbase配置
    20         Configuration conf=HBaseConfiguration.create();
    21         conf.set("hbase.zookeeper.property.clientPort", "2181");//端口
    22         conf.set("hbase.zookeeper.quorum","hdh1,hdh2,hdh3");//hbase zookeeper地址
    23         //扫描配置
    24         Scan scan=new Scan();
    25         scan.addFamily(Bytes.toBytes("cf"));//列族,可添加多个
    26         //hbase表
    27         HTable hTable=new HTable(conf, Bytes.toBytes("test"));//表明
    28         //获取扫描数据
    29         ResultScanner rs= hTable.getScanner(scan);
    30         //hbase表的列族信息
    31         HColumnDescriptor[] hColDes=hTable.getTableDescriptor().getColumnFamilies();
    32         for (HColumnDescriptor hColDe : hColDes) {
    33             System.out.println(Bytes.toString(hColDe.getName()));
    34         }
    35         //展示每一行的每一列(这个只有一列)信息
    36         for (Result r : rs) {
    37             byte [] bytes= r.getValue(Bytes.toBytes("cf"),Bytes.toBytes("SSID"));//列族和列名
    38             String str=new String(bytes,"UTF-8");
    39             if(null!=str&&str.trim().length()>0) {
    40                 System.out.println(str.trim());
    41             }
    42         }
    43         System.out.println("end<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<");
    44     }
    45 }

    二、Spark提供接口获取Hbase表数据:

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.hbase.HBaseConfiguration;
     3 import org.apache.hadoop.hbase.client.Result;
     4 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
     5 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
     6 import org.apache.hadoop.hbase.util.Bytes;
     7 import org.apache.spark.SparkConf;
     8 import org.apache.spark.api.java.JavaPairRDD;
     9 import org.apache.spark.api.java.JavaSparkContext;
    10 import org.apache.spark.api.java.function.VoidFunction;
    11 import scala.Tuple2;
    12 
    13 import java.io.IOException;
    14 
    15 /**
    16  * 通过hfile形式获取数据
    17  */
    18 public class DataAchieveFromHfile {
    19     private static JavaPairRDD<ImmutableBytesWritable, Result> rdd;
    20 
    21     public static void main(String[] args) throws IOException {
    22         Configuration conf= HBaseConfiguration.create();
    23         conf.set("hbase.zookeeper.property.clientPort", "2181");
    24         conf.set("hbase.zookeeper.quorum","hdh1,hdh2,hdh3");
    25         conf.set(TableInputFormat.INPUT_TABLE, "test");
    26         SparkConf conf1=new SparkConf().setAppName("test").setMaster("local");//设置spark app名称和运行模式(此为local模式)
    27         JavaSparkContext sc=new JavaSparkContext(conf1);
    28         //加载数据
    29         rdd=sc.newAPIHadoopRDD(conf,TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
    30         System.out.println("读取数据条数:"+rdd.count());
    31         rdd.foreach(new VoidFunction<Tuple2<ImmutableBytesWritable, Result>>() {
    32             @Override
    33             public void call(Tuple2<ImmutableBytesWritable, Result> result) throws Exception {
    34                 byte [] bytes= result._2().getValue(Bytes.toBytes("cf"), Bytes.toBytes("SSID"));//列族和列名
    35                 String str= new String(bytes,"UTF-8");
    36                 if(null!=str&&str.trim().length()>0) {
    37                     System.out.println(str.trim());
    38                 }
    39             }
    40         });
    41     }
    42 }
  • 相关阅读:
    104.Maximum Depth of Binary Tree
    103.Binary Tree Zigzag Level Order Traversal
    102.Binary Tree Level Order Traversal
    101.Symmetric Tree
    100.Same Tree
    99.Recover Binary Search Tree
    98.Validate Binary Search Tree
    97.Interleaving String
    static静态初始化块
    serialVersionUID作用
  • 原文地址:https://www.cnblogs.com/yangh2016/p/5737350.html
Copyright © 2011-2022 走看看