zoukankan      html  css  js  c++  java
  • Spark通过原生API连接es

    /**
    pom依赖
    <dependency>
       	<groupId>org.elasticsearch</groupId>
     	<artifactId>elasticsearch-hadoop</artifactId>
        <version>2.2.0-m1</version>
    </dependency>
    **/
    
    import data.spark.batch.cardbin.util.CardBinFields;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.sql.SQLContext;
    //import ....
    
    public class SparkConnectionEs{
        //spark直连es并通过CardBinFields实体转为sparkRdd从而注册成table
        private static String sourceIP = "192.168.23.23";
        private static String esPath = "ybs_cardbin_info_bak/cardbin";//es_index/es_type
        public static void main(String[] args) throws Exception {
        JavaRDD<CardBinFields> esdataRdd = JavaEsSpark.esRDD(sparkContext, esPath).map(new Function<Tuple2<String, Map<String, Object>>, CardBinFields>() {
    			private static final long serialVersionUID = 1L;
    
    			public CardBinFields call(Tuple2<String, Map<String, Object>> v1) throws Exception {
    				CardBinFields cardbin = new CardBinFields();
    				cardbin.setId(v1._1);
    				cardbin.setBank_no(v1._2.get("bank_no").toString());
    				return cardbin;
    			}
    		});
            DataFrame tfcardnoDF = sqlContext.createDataFrame(esdataRdd, CardBinFields.class).select("id", "bank_no");
    		tfcardnoDF.registerTempTable("ES_FIELDS");
        }
    }
    
    
    狭路相逢勇者胜!
  • 相关阅读:
    腾讯云CDN python SDK
    GLFW初体验
    Mac使用Xcode配置openGL
    sklearn神经网络分类
    sklearn LDA降维算法
    sklearn CART决策树分类
    sklearn逻辑回归
    抢占式内核与非抢占式内核
    操作系统原理学习笔记--进程管理
    操作系统原理4——存储管理
  • 原文地址:https://www.cnblogs.com/amcoder/p/13919494.html
Copyright © 2011-2022 走看看