zoukankan      html  css  js  c++  java
  • spark streaming 实例

    spark-streaming读hdfs,统计文件中单词数量,并写入mysql

    package com.yeliang;
    
    import java.sql.Connection;
    import java.sql.Statement;
    import java.util.Arrays;
    import java.util.Iterator;
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.function.FlatMapFunction;
    import org.apache.spark.api.java.function.Function2;
    import org.apache.spark.api.java.function.PairFunction;
    import org.apache.spark.api.java.function.VoidFunction;
    import org.apache.spark.streaming.Durations;
    import org.apache.spark.streaming.Time;
    import org.apache.spark.streaming.api.java.JavaDStream;
    import org.apache.spark.streaming.api.java.JavaPairDStream;
    import org.apache.spark.streaming.api.java.JavaStreamingContext;
    
    
    import scala.Tuple2;
    import scala.collection.generic.BitOperations.Int;
    
    public class SparkStreamTest {
    	public static void main(String[] args) {
    		//本地运行
    		SparkConf conf = new SparkConf().setMaster("local[1]").setAppName("xxzx");
    		//每5秒提交spark
    		JavaStreamingContext jssc = new JavaStreamingContext(conf,Durations.seconds(5));
    		//读hdfs
    		JavaDStream<String> stream = jssc.textFileStream("hdfs://n1:9000/wordcount_dir");
    		JavaDStream<String> map = stream.flatMap(new FlatMapFunction<String, String>() {
    			private static final long serialVersionUID = 1L;
    
    			public Iterable<String> call(String arg0) throws Exception {
    				return Arrays.asList(arg0.split(" "));
    			}
    		});
    		JavaPairDStream<String, Integer> pairDStream = map.mapToPair(new PairFunction<String, String, Integer>() {
    			private static final long serialVersionUID = 1L;
    
    			public Tuple2<String, Integer> call(String arg0) throws Exception {
    				// TODO Auto-generated method stub
    				return new Tuple2<String, Integer>(arg0,1) ;
    			}
    		});
    		JavaPairDStream<String, Integer> result = pairDStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
    			private static final long serialVersionUID = 1L;
    
    			@Override
    			public Integer call(Integer arg0, Integer arg1) throws Exception {
    				// TODO Auto-generated method stub
    				return arg0+arg1;
    			}
    		});
    		
    		result.print();
    		result.foreachRDD(new VoidFunction<JavaPairRDD<String, Integer>>() {
    
    			@Override
    			public void call(JavaPairRDD<String, Integer> arg0) throws Exception {
    				arg0.foreachPartition(new VoidFunction<Iterator<Tuple2<String,Integer>>>() {
    
    					@Override
    					public void call(Iterator<Tuple2<String, Integer>> arg0) throws Exception {
    						Connection conn = ConnectionPool.getConnection();
    						Statement stat = conn.createStatement();
    						while(arg0.hasNext()){
    							Tuple2<String, Integer> wordcount = arg0.next();
    							String sql = "insert into wordcount(word,count) values('"+wordcount._1+"',"+wordcount._2()+")";
    							stat.addBatch(sql);
    						}
    						stat.executeBatch();
    						ConnectionPool.returnConnection(conn);
    					}
    				});
    			}
    		});
    		
    		jssc.start();
    		jssc.awaitTermination();
    		jssc.close();
    	}
    }
    

      

  • 相关阅读:
    经典傻逼题
    谈谈Spring的IoC之注解扫描
    吴裕雄 python 神经网络——TensorFlow 花瓣分类与迁移学习(1)
    吴裕雄 python 神经网络——TensorFlow 实现LeNet-5模型处理MNIST手写数据集
    吴裕雄 python 神经网络——TensorFlow 花瓣识别2
    吴裕雄 python 神经网络——TensorFlow训练神经网络:花瓣识别
    吴裕雄 python 神经网络——TensorFlow训练神经网络:卷积层、池化层样例
    吴裕雄 python 神经网络——TensorFlow训练神经网络:MNIST最佳实践
    吴裕雄 python 神经网络——TensorFlow训练神经网络:不使用滑动平均
    吴裕雄 python 神经网络——TensorFlow训练神经网络:不使用隐藏层
  • 原文地址:https://www.cnblogs.com/mowei/p/6876868.html
Copyright © 2011-2022 走看看