zoukankan      html  css  js  c++  java
  • flink batch wordcount

    1、POJO方式

    public class WordCountPojo {
        public static class Word{
            private String word;
            private int frequency;
    
            public Word() {
            }
    
            public Word(String word, int frequency) {
                this.word = word;
                this.frequency = frequency;
            }
    
            public String getWord() {
                return word;
            }
    
            public void setWord(String word) {
                this.word = word;
            }
    
            public int getFrequency() {
                return frequency;
            }
    
            public void setFrequency(int frequency) {
                this.frequency = frequency;
            }
    
            @Override
            public String toString() {
                return "Word=" + word + " freq=" + frequency;
            }
        }
    
        /**
         * Implements the string tokenizer that splits sentences into words as a user-defined
         * FlatMapFunction. The function takes a line (String) and splits it into
         * multiple Word objects.
         */
        public static final class Tokenizer implements FlatMapFunction<String, Word> {
    
            @Override
            public void flatMap(String value, Collector<Word> out) {
                // normalize and split the line
                String[] tokens = value.toLowerCase().split("\W+");
    
                // emit the pairs
                for (String token : tokens) {
                    if (token.length() > 0) {
                        out.collect(new Word(token, 1));
                    }
                }
            }
        }
    
        public static void main(String args[]) throws Exception {
            final ParameterTool params = ParameterTool.fromArgs(args);
    
            // set up the execution environment
            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
            // make parameters available in the web interface
            env.getConfig().setGlobalJobParameters(params);
    
            // get input data
            DataSet<String> text;
            if (params.has("input")) {
                // read the text file from given input path
                text = env.readTextFile(params.get("input"));
            } else {
                // get default test text data
                System.out.println("Executing WordCount example with default input data set.");
                System.out.println("Use --input to specify file input.");
                text = WordCountData.getDefaultTextLineDataSet(env);
            }
    
            DataSet<Word> counts = text
                    // split up the lines into Word objects (with frequency = 1)
                    .flatMap(new Tokenizer())
                    // group by the field word and sum up the frequency
                    .groupBy("word")
                    .reduce(new ReduceFunction<Word>() {
                        @Override
                        public Word reduce(Word value1, Word value2) throws Exception {
                            return new Word(value1.word, value1.frequency + value2.frequency);
                        }
                    });
            if (params.has("output")) {
                counts.writeAsText(params.get("output"), FileSystem.WriteMode.OVERWRITE);
                // execute program
                env.execute("WordCount-Pojo Example");
            } else {
                System.out.println("Printing result to stdout. Use --output to specify output path.");
                counts.print();
            }
        }
    
    
    }

    2、元组方式

    public class WordCount {
    
        /**
         * Implements the string tokenizer that splits sentences into words as a user-defined
         * FlatMapFunction. The function takes a line (String) and splits it into
         * multiple pairs in the form of "(word,1)" ({@code Tuple2<String, Integer>}).
         */
        public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                // normalize and split the line
                String[] tokens = value.toLowerCase().split("\W+");
    
                // emit the pairs
                for (String token : tokens) {
                    if (token.length() > 0) {
                        out.collect(new Tuple2<>(token, 1));
                    }
                }
            }
        }
    
        public static void main(String args[]) throws Exception {
            final ParameterTool params = ParameterTool.fromArgs(args);
    
            // set up the execution environment
            final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
            // make parameters available in the web interface
            env.getConfig().setGlobalJobParameters(params);
    
            // get input data
            DataSet<String> text;
            if (params.has("input")) {
                // read the text file from given input path
                text = env.readTextFile(params.get("input"));
            } else {
                // get default test text data
                System.out.println("Executing WordCount example with default input data set.");
                System.out.println("Use --input to specify file input.");
                text = WordCountData.getDefaultTextLineDataSet(env);
            }
    
            DataSet<Tuple2<String,Integer>> counts = text
                    // split up the lines in pairs (2-tuples) containing: (word,1)
                    .flatMap(new Tokenizer())
                    // group by the tuple field "0" and sum up tuple field "1"
                    .groupBy(0)
                    .reduce(new ReduceFunction<Tuple2<String, Integer>>() {
                        @Override
                        public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception {
                            return new Tuple2<>(value1.f0,value1.f1+value2.f1);
                        }
                    }); //等效于sum(1)
    //                .sum(1);
            // emit result
            if(params.has("output")){
                counts.writeAsCsv(params.get("output"),"
    "," ");
                // execute program
                env.execute("WordCount batch");
            }else {
                System.out.println("Printing result to stdout. Use --output to specify output path.");
                counts.print();
            }
    
        }
    }
  • 相关阅读:
    点击对应不同name的button,显示不同name的弹窗(弹窗功能)
    点击添加本地图片的前端效果制作
    巧用margin/padding的百分比值实现高度自适应(多用于占位,避免闪烁)
    移动端取消touch高亮效果
    手机网站的几点注意
    图片自动切换+链接
    使用DOM的方法获取所有li元素,然后使用jQuery()构造函数把它封装为jQuery对象
    使用jQuery匹配文档中所有的li元素,返回一个jQuery对象,然后通过数组下标的方式读取jQuery集合中第1个DOM元素,此时返回的是DOM对象,然后调用DOM属性innerHTML,读取该元素 包含的文本信息
    利用jQuery扩展接口为jQuery框架定义了两个自定义函数,然后调用这两个函数
    jQuery链式语法演示
  • 原文地址:https://www.cnblogs.com/asker009/p/10952588.html
Copyright © 2011-2022 走看看