zoukankan      html  css  js  c++  java
  • java spark sql 计算各个省份广告点击数的top3

    同这个需求一样,用spark sql的方式实现(相对来说简单一点)

    https://www.cnblogs.com/7749ha/p/12909115.html

    package sparksql;
    import org.apache.spark.SparkContext;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.sql.Dataset;
    import org.apache.spark.sql.Row;
    import org.apache.spark.sql.RowFactory;
    import org.apache.spark.sql.SparkSession;
    import org.apache.spark.sql.types.DataTypes;
    import org.apache.spark.sql.types.StructField;
    import org.apache.spark.sql.types.StructType;
    import org.apache.spark.api.java.function.Function;
    
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * # _*_ coding:utf-8 _*_
     * # Author:xiaoshubiao
     * # Time : 2020/5/15 16:44
     **/
    public class sparksql_test {
        public static void main(String[] args) throws Exception{
            SparkSession spark = SparkSession
                    .builder()
                    .appName("Java Spark SQL basic example")
                    .getOrCreate();
            SparkContext sc = spark.sparkContext();
            JavaRDD<String> stringJavaRDD = sc.textFile("D:/tmp/rizhi.txt", 1).toJavaRDD();
            // 创建列
            String schemaString = "ts province city user ad";
            List<StructField> fields = new ArrayList<>();
            for(String fieldname:schemaString.split(" ")){
                fields.add(DataTypes.createStructField(fieldname,DataTypes.StringType,true));
            }
            // 添加列
            StructType structType = DataTypes.createStructType(fields);
            JavaRDD<Row> rowRDD = stringJavaRDD.map(
                    (Function<String, Row>) record -> {
                String[] attributes = record.split(" ");
                return RowFactory.create(attributes[0], attributes[1].trim(),attributes[2],attributes[3],attributes[4]);
            });
            Dataset<Row> dataFrame = spark.createDataFrame(rowRDD, structType);
            // 显示查看
            dataFrame.show();
            // 创建临时view
            dataFrame.createTempView("people");
            // sql处理需求
            String sql = "select * from (select province,ad,c,row_number() over(partition by province order by c desc) as ind from (select province,ad,count(*) as c from people group by province,ad)t)t where ind <3";
            spark.sql(sql).show();
    
    
        }
    }
  • 相关阅读:
    mysql 安全
    选择年份 php的写法要比js简洁一些
    PHP for 循环
    vb和php 基于socket通信
    PHP 数组和字符串互相转换实现方法
    php中对2个数组相加的函数
    开启mysql sql追踪
    幸运码
    系统管理模块_岗位管理_改进_使用ModelDroven方案_套用美工写好的页面效果_添加功能与修改功能使用同一个页面
    系统管理模块_岗位管理_实现CRUD功能的具体步骤并设计Role实体
  • 原文地址:https://www.cnblogs.com/7749ha/p/12910407.html
Copyright © 2011-2022 走看看