zoukankan      html  css  js  c++  java
  • Operator_countByValue

    package com.bjsxt.spark.actions;

    import java.util.Arrays;
    import java.util.Map;
    import java.util.Map.Entry;

    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaPairRDD;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;

    import scala.Tuple2;
    /**
    * countByValue
    * 根据数据集每个元素相同的内容来计数。返回相同内容的元素对应的条数。
    *
    * @author root
    *
    */
    public class Operator_countByValue {
    public static void main(String[] args) {
    SparkConf conf = new SparkConf();
    conf.setMaster("local").setAppName("countByKey");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaPairRDD<Integer, String> parallelizePairs = sc.parallelizePairs(Arrays.asList(
    new Tuple2<Integer,String>(1,"a"),
    new Tuple2<Integer,String>(2,"b"),
    new Tuple2<Integer,String>(2,"c"),
    new Tuple2<Integer,String>(3,"c"),
    new Tuple2<Integer,String>(4,"d"),
    new Tuple2<Integer,String>(4,"d")
    ));

    Map<Tuple2<Integer, String>, Long> countByValue = parallelizePairs.countByValue();

    for(Entry<Tuple2<Integer, String>, Long> entry : countByValue.entrySet()){
    System.out.println("key:"+entry.getKey()+",value:"+entry.getValue());
    }
    }
    }

  • 相关阅读:
    开放6379端口
    synchronized 实现同步的基础
    pythoning—— 5:实战篇(购物车)
    pythoning ——3、数据类型(字符串)
    pythoning ——2、数据类型(元组、序列)
    pythoning ——1、基础篇
    暗链/黑链
    什么是webshell
    获取当前设备IP
    生成公钥
  • 原文地址:https://www.cnblogs.com/huiandong/p/9194537.html
Copyright © 2011-2022 走看看