zoukankan      html  css  js  c++  java
  • Spark读取Es写入Hdfs

    https://blog.csdn.net/qq_39481696/article/details/82597912

    hbase 入门
    https://blog.csdn.net/guolindonggld/article/details/82767620

    package org.bathkafka.com;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.Result;
    import org.apache.hadoop.hbase.client.Scan;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
    import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
    import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
    import org.apache.hadoop.hbase.util.Base64;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.api.java.function.PairFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    import scala.Tuple2;

    public class SparkToHbase {
    public static void main(String[] args) {
    // 集群启用了kerberos认证
    //Configuration configuration = kerberos();//集群启用了kerberos认证,没有认证的话,将这行注释掉即可
    Configuration configuration = new Configuration();
    String tableName = "gld:student"; // 命名空间下的表

    String FAMILY = "cf1"; 列族名称
    String COLUM_NAME = "name"; 列名
    String COLUM_SEX = "sex"; 列名
    String COLUM_AGE = "age"; 列名

    SparkConf sparkConf = new SparkConf().setAppName("SparkDataFromHbase").setMaster("local[*]");
    JavaSparkContext sc = new JavaSparkContext(sparkConf);

    // Hbase配置
    Configuration hconf = HBaseConfiguration.create(configuration);// kerberos认证集群必须传递已经认证过的conf
    hconf.set("hbase.zookeeper.quorum","node1,node2,node3");
    hconf.set("hbase.zookeeper.property.clientPort", "2181");
    hconf.set(TableInputFormat.INPUT_TABLE, tableName);

    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes(FAMILY));
    scan.addColumn(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_AGE));
    scan.addColumn(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_SEX));
    scan.addColumn(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_NAME));
    try {
    //添加scan
    ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
    String ScanToString = Base64.encodeBytes(proto.toByteArray());
    hconf.set(TableInputFormat.SCAN, ScanToString);
    //读HBase数据转化成RDD
    JavaPairRDD<ImmutableBytesWritable, Result> hbaseRDD = sc.newAPIHadoopRDD(hconf,
    TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
    hbaseRDD.cache();// 对myRDD进行缓存
    System.out.println("数据总条数:" + hbaseRDD.count());

    //将Hbase数据转换成PairRDD,年龄:姓名
    JavaPairRDD<Integer, String> mapToPair = hbaseRDD.mapToPair(new PairFunction<Tuple2<ImmutableBytesWritable,
    Result>, Integer, String>() {
    private static final long serialVersionUID = -2437063503351644147L;

    @Override
    public Tuple2<Integer, String> call(
    Tuple2<ImmutableBytesWritable, Result> resultTuple2) throws Exception {
    byte[] o1 = resultTuple2._2.getValue(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_NAME));//取列的值
    byte[] o2 = resultTuple2._2.getValue(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_AGE));//取列的值
    byte[] o3 = resultTuple2._2.getValue(Bytes.toBytes(FAMILY), Bytes.toBytes(COLUM_SEX));//取列的值
    return new Tuple2<Integer, String>(new Integer(new String(o2)), new String(o1));
    }
    });

    //按第几列降序排序
    JavaPairRDD<Integer, String> sortByKey = mapToPair.sortByKey(false);
    sortByKey.foreach(new VoidFunction<Tuple2<Integer, String>>() {
    @Override
    public void call(Tuple2<Integer, String> integerStringTuple2) throws Exception {
    System.out.println(integerStringTuple2._2);
    }
    });
    //写入数据到hdfs系统
    // sortByKey.saveAsTextFile("hdfs://********:8020/tmp/test");

    hbaseRDD.unpersist();
    } catch (Exception e) {
    e.printStackTrace();
    } finally {
    }
    }
    }

  • 相关阅读:
    【设计模式】——抽象工厂模式
    【设计模式】——观察者模式
    Candy
    Two Sum
    Interleaving String
    Longest Valid Parentheses
    【设计模式】——建造者模式
    【设计模式】——外观模式
    Simplify Path
    Word Search
  • 原文地址:https://www.cnblogs.com/Mr--zhao/p/12212757.html
Copyright © 2011-2022 走看看