1.maven依赖
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka-0.11_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.bahir</groupId> <artifactId>flink-connector-redis_2.11</artifactId> <version>1.0</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch6_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.44</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-statebackend-rocksdb_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner-blink_2.12</artifactId> <version>1.10.1</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-csv</artifactId> <version>1.10.1</version> </dependency>
2.sensor.txt
sensor_1,1547718199,35.8 sensor_6,1547718201,15.4 sensor_7,1547718202,6.7 sensor_10,1547718205,38.1 sensor_1,1547718207,36.3 sensor_1,1547718209,32.8 sensor_1,1547718212,37.1
3.bean
// 传感器温度读数的数据类型
public class SensorReading {
// 属性:id,时间戳,温度值
private String id;
private Long timestamp;
private Double temperature;
public SensorReading() {
}
public SensorReading(String id, Long timestamp, Double temperature) {
this.id = id;
this.timestamp = timestamp;
this.temperature = temperature;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getTimestamp() {
return timestamp;
}
public void setTimestamp(Long timestamp) {
this.timestamp = timestamp;
}
public Double getTemperature() {
return temperature;
}
public void setTemperature(Double temperature) {
this.temperature = temperature;
}
@Override
public String toString() {
return "SensorReading{" +
"id='" + id + '\'' +
", timestamp=" + timestamp +
", temperature=" + temperature +
'}';
}
}
4.source
public class SourceTest1_Collection { public static void main(String[] args) throws Exception { //创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); //从集合中读取数据 DataStream<SensorReading> dataStream = env.fromCollection(Arrays.asList( new SensorReading("sensor_1", 1547718199L, 35.8), new SensorReading("sensor_6", 1547718201L, 15.4), new SensorReading("sensor_7", 1547718202L, 6.7), new SensorReading("sensor_10", 1547718205L, 38.1) )); DataStream<Integer> integerDataStream = env.fromElements(1,2,4,67,189); //打印输出 dataStream.print("data"); integerDataStream.print("int"); //执行 env.execute(); } }
public class SourceTest2_File { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<String> dataStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 打印输出 dataStream.print(); env.execute(); } }
public class SourceTest3_Kafka { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "consumer-group"); properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); properties.setProperty("auto.offset.reset", "latest"); // 从文件读取数据 DataStream<String> dataStream = env.addSource( new FlinkKafkaConsumer011<String>("sensor", new SimpleStringSchema(), properties)); // 打印输出 dataStream.print(); env.execute(); } }
public class SourceTest4_UDF { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<SensorReading> dataStream = env.addSource( new MySensorSource() ); // 打印输出 dataStream.print(); env.execute(); } //实现自定义的SourceFunction public static class MySensorSource implements SourceFunction<SensorReading> { //定义一个标识位,用来控制数据的产生 private boolean running = true; @Override public void run(SourceContext<SensorReading> ctx) throws Exception { //定义一个随机数发生器 Random random = new Random(); //设置10个传感器的初始温度 HashMap<String, Double> sensorTempMap = new HashMap<>(); for(int i = 0; i< 10; i++) { sensorTempMap.put("sensor_"+(i+1),60+random.nextGaussian()*20); } while (running) { for(String sensorId : sensorTempMap.keySet()) { // 在当前温度基础上随机波动 Double newtemp = sensorTempMap.get(sensorId) + random.nextGaussian(); sensorTempMap.put(sensorId,newtemp); ctx.collect(new SensorReading(sensorId,System.currentTimeMillis(),newtemp)); } //控制输出频率 Thread.sleep(2000L); } } @Override public void cancel() { running = false; } } }
5.transform
public class TransformTest1_Base { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); //1.map,把String转换成长度输出 DataStream<Integer> mapStream = inputStream.map(new MapFunction<String, Integer>() { @Override public Integer map(String value) throws Exception { return value.length(); } }); //2. flatmap,按逗号分字段 DataStream<String> flatMapStream = inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { String[] fields = value.split(","); for(String field : fields) { out.collect(field); } } }); // 3. filter, 筛选sensor_1开头的id对应的数据 DataStream<String> filterStream = inputStream.filter(new FilterFunction<String>() { @Override public boolean filter(String value) throws Exception { return value.startsWith("sensor_1"); } }); // 打印输出 mapStream.print("map"); flatMapStream.print("flatMap"); filterStream.print("filter"); env.execute(); } }
public class TransformTest2_RollingAggregation { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 // DataStream<SensorReading> dataStream = inputStream.map(new MapFunction<String, SensorReading>() { // @Override // public SensorReading map(String value) throws Exception { // String[] fields = value.split(","); // return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); // } // }); DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); } ); // 分组 KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id"); KeyedStream<SensorReading, String> keyedStream1 = dataStream.keyBy(data -> data.getId()); DataStream<Long> dataStream1 = env.fromElements(1L, 34L, 4L, 657L, 23L); KeyedStream<Long, Integer> keyedStream2 = dataStream1.keyBy(new KeySelector<Long, Integer>() { @Override public Integer getKey(Long value) throws Exception { return value.intValue() % 2; } }); // KeyedStream<SensorReading, String> keyedStream1 = dataStream.keyBy(SensorReading::getId); // 滚动聚合,取当前最大的温度值 DataStream<SensorReading> resultStream = keyedStream.maxBy("temperature"); resultStream.print("result"); keyedStream1.print("key1"); keyedStream2.sum(0).print("key2"); env.execute(); } }
public class TransformTest3_Reduce { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); }); // 分组 KeyedStream<SensorReading, Tuple> keyedStream = dataStream.keyBy("id"); // reduce聚合,取最大的温度值,以及当前最新的时间戳 SingleOutputStreamOperator<SensorReading> resultStream = keyedStream.reduce(new ReduceFunction<SensorReading>() { @Override public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception { return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(), value2.getTemperature())); } }); keyedStream.reduce( (curState, newData) -> { return new SensorReading(curState.getId(), newData.getTimestamp(), Math.max(curState.getTemperature(), newData.getTemperature())); }); resultStream.print(); env.execute(); } }
public class TransformTest4_MultipleStreams {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 从文件读取数据
DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt");
// 转换成SensorReading
DataStream<SensorReading> dataStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2]));
} );
// 1. 分流,按照温度值30度为界分为两条流
SplitStream<SensorReading> splitStream = dataStream.split(new OutputSelector<SensorReading>() {
@Override
public Iterable<String> select(SensorReading value) {
return (value.getTemperature() > 30) ? Collections.singletonList("high") : Collections.singletonList("low");
}
});
DataStream<SensorReading> highTempStream = splitStream.select("high");
DataStream<SensorReading> lowTempStream = splitStream.select("low");
DataStream<SensorReading> allTempStream = splitStream.select("high", "low");
highTempStream.print("high");
lowTempStream.print("low");
allTempStream.print("all");
// 2. 合流 connect,将高温流转换成二元组类型,与低温流连接合并之后,输出状态信息
DataStream<Tuple2<String, Double>> warningStream = highTempStream.map(new MapFunction<SensorReading, Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> map(SensorReading value) throws Exception {
return new Tuple2<>(value.getId(), value.getTemperature());
}
});
ConnectedStreams<Tuple2<String, Double>, SensorReading> connectedStreams = warningStream.connect(lowTempStream);
DataStream<Object> resultStream = connectedStreams.map(new CoMapFunction<Tuple2<String, Double>, SensorReading, Object>() {
@Override
public Object map1(Tuple2<String, Double> value) throws Exception {
return new Tuple3<>(value.f0, value.f1, "high temp warning");
}
@Override
public Object map2(SensorReading value) throws Exception {
return new Tuple2<>(value.getId(), "normal");
}
});
resultStream.print();
// 3. union联合多条流
// warningStream.union(lowTempStream);
highTempStream.union(lowTempStream, allTempStream);
env.execute();
}
}
public class TransformTest5_RichFunction { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); }); DataStream<Tuple2<String, Integer>> resultStream = dataStream.map( new MyMapper() ); resultStream.print(); env.execute(); } public static class MyMapper0 implements MapFunction<SensorReading, Tuple2<String, Integer>>{ @Override public Tuple2<String, Integer> map(SensorReading value) throws Exception { return new Tuple2<>(value.getId(), value.getId().length()); } } // 实现自定义富函数类 public static class MyMapper extends RichMapFunction<SensorReading, Tuple2<String, Integer>>{ @Override public Tuple2<String, Integer> map(SensorReading value) throws Exception { // getRuntimeContext().getState(); return new Tuple2<>(value.getId(), getRuntimeContext().getIndexOfThisSubtask()); } @Override public void open(Configuration parameters) throws Exception { // 初始化工作,一般是定义状态,或者建立数据库连接 System.out.println("open"); } @Override public void close() throws Exception { // 一般是关闭连接和清空状态的收尾操作 System.out.println("close"); } } }
public class TransformTest6_Partition { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); }); dataStream.print("input"); // 1. shuffle DataStream<String> shuffleStream = inputStream.shuffle(); // shuffleStream.print("shuffle"); // 2. keyBy // dataStream.keyBy("id").print("keyBy"); // 3. global dataStream.global().print("global"); env.execute(); } }
6.sink
public class SinkTest1_Kafka { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // // 从文件读取数据 // DataStream<String> inputStream = env.readTextFile("D:\workspace\flinkworld\src\main\resources\sensor.txt"); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "localhost:9092"); properties.setProperty("group.id", "consumer-group"); properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); properties.setProperty("auto.offset.reset", "latest"); // 从文件读取数据 DataStream<String> inputStream = env.addSource( new FlinkKafkaConsumer011<String>("sensor", new SimpleStringSchema(), properties)); // 转换成SensorReading类型 DataStream<String> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])).toString(); }); dataStream.addSink( new FlinkKafkaProducer011<String>("localhost:9092", "sinktest", new SimpleStringSchema())); env.execute(); } }
public class SinkTest2_Redis { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); }); // 定义jedis连接配置 FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder() .setHost("localhost") .setPort(6379) .build(); dataStream.addSink( new RedisSink<>(config, new MyRedisMapper())); env.execute(); } // 自定义RedisMapper public static class MyRedisMapper implements RedisMapper<SensorReading>{ // 定义保存数据到redis的命令,存成Hash表,hset sensor_temp id temperature @Override public RedisCommandDescription getCommandDescription() { return new RedisCommandDescription(RedisCommand.HSET, "sensor_temp"); } @Override public String getKeyFromData(SensorReading data) { return data.getId(); } @Override public String getValueFromData(SensorReading data) { return data.getTemperature().toString(); } } }
public class SinkTest3_Es { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 DataStream<String> inputStream = env.readTextFile("D:\\workspace\\flinkworld\\src\\main\\resources\\sensor.txt"); // 转换成SensorReading类型 DataStream<SensorReading> dataStream = inputStream.map(line -> { String[] fields = line.split(","); return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); }); // 定义es的连接配置 ArrayList<HttpHost> httpHosts = new ArrayList<>(); httpHosts.add(new HttpHost("localhost", 9200)); dataStream.addSink(new ElasticsearchSink.Builder<SensorReading>(httpHosts, new MyEsSinkFunction()).build()); env.execute(); } // 实现自定义的ES写入操作 public static class MyEsSinkFunction implements ElasticsearchSinkFunction<SensorReading>{ @Override public void process(SensorReading element, RuntimeContext ctx, RequestIndexer indexer) { // 定义写入的数据source HashMap<String, String> dataSource = new HashMap<>(); dataSource.put("id", element.getId()); dataSource.put("temp", element.getTemperature().toString()); dataSource.put("ts", element.getTimestamp().toString()); // 创建请求,作为向es发起的写入命令 IndexRequest indexRequest = Requests.indexRequest() .index("sensor") .type("readingdata") .source(dataSource); // 用index发送请求 indexer.add(indexRequest); } } }
public class SinkTest4_Jdbc { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); // 从文件读取数据 // DataStream<String> inputStream = env.readTextFile("D:\workspace\flinkworld\src\main\resources\sensor.txt"); // // // 转换成SensorReading类型 // DataStream<SensorReading> dataStream = inputStream.map(line -> { // String[] fields = line.split(","); // return new SensorReading(fields[0], new Long(fields[1]), new Double(fields[2])); // }); DataStream<SensorReading> dataStream = env.addSource(new SourceTest4_UDF.MySensorSource()); dataStream.addSink(new MyJdbcSink()); env.execute(); } // 实现自定义的SinkFunction public static class MyJdbcSink extends RichSinkFunction<SensorReading> { // 声明连接和预编译语句 Connection connection = null; PreparedStatement insertStmt = null; PreparedStatement updateStmt = null; @Override public void open(Configuration parameters) throws Exception { connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", "root", "123456"); insertStmt = connection.prepareStatement("insert into sensor_temp (id, temp) values (?, ?)"); updateStmt = connection.prepareStatement("update sensor_temp set temp = ? where id = ?"); } // 每来一条数据,调用连接,执行sql @Override public void invoke(SensorReading value, Context context) throws Exception { // 直接执行更新语句,如果没有更新那么就插入 updateStmt.setDouble(1, value.getTemperature()); updateStmt.setString(2, value.getId()); updateStmt.execute(); if( updateStmt.getUpdateCount() == 0 ){ insertStmt.setString(1, value.getId()); insertStmt.setDouble(2, value.getTemperature()); insertStmt.execute(); } } @Override public void close() throws Exception { insertStmt.close(); updateStmt.close(); connection.close(); } } }