zoukankan      html  css  js  c++  java
  • spark sql01

    package sql;
    
    
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.sql.DataFrame;
    import org.apache.spark.sql.SQLContext;
    
    /**
     *
     */
    public class DataFrameReadJsonOps2 {
    
        /**
         * @param args
         */
        public static void main(String[] args) {
            //创建SparkConf用于读取系统配置信息并设置当前应用程序的名字
            SparkConf conf = new SparkConf().setAppName("DataFrameOps").setMaster("local");
            //创建JavaSparkContext对象实例作为整个Driver的核心基石
            JavaSparkContext sc = new JavaSparkContext(conf);
            //设置日志级别为WARN
            sc.setLogLevel("WARN");
            //创建SQLContext上下文对象用于SQL的分析
            SQLContext sqlContext = new SQLContext(sc);
            //创建Data Frame,可以简单的认为DataFrame是一张表
            DataFrame df = sqlContext.read().json("c:/resources/people.json");
            //select * from table
            df.show();
            //desc table
            df.printSchema();
            //select name from table
            df.select(df.col("name")).show();
            //select name, age+10 from table
            df.select(df.col("name"), df.col("age").plus(10)).show();
            //select * from table where age > 21
            df.filter(df.col("age").gt(21)).show();
            //select age, count(1) from table group by age
            df.groupBy("age").count().show(); //df.groupBy(df.col("age")).count().show();
        }
    
    }
    //
    //SLF4J: Class path contains multiple SLF4J bindings.
    //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    //SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-examples-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    //SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
    //SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
    //Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
    //17/12/29 14:15:10 INFO SparkContext: Running Spark version 1.4.0
    //17/12/29 14:15:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    //17/12/29 14:15:28 INFO SecurityManager: Changing view acls to: alamps
    //17/12/29 14:15:28 INFO SecurityManager: Changing modify acls to: alamps
    //17/12/29 14:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(alamps); users with modify permissions: Set(alamps)
    //17/12/29 14:15:37 INFO Slf4jLogger: Slf4jLogger started
    //17/12/29 14:15:39 INFO Remoting: Starting remoting
    //17/12/29 14:15:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@172.18.3.7:55458]
    //17/12/29 14:15:44 INFO Utils: Successfully started service 'sparkDriver' on port 55458.
    //17/12/29 14:15:45 INFO SparkEnv: Registering MapOutputTracker
    //17/12/29 14:15:46 INFO SparkEnv: Registering BlockManagerMaster
    //17/12/29 14:15:46 INFO DiskBlockManager: Created local directory at C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80blockmgr-660894dd-39d3-4c8a-bf25-ae1d3850953d
    //17/12/29 14:15:46 INFO MemoryStore: MemoryStore started with capacity 467.6 MB
    //17/12/29 14:15:47 INFO HttpFileServer: HTTP File server directory is C:UsersalampsAppDataLocalTempspark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80bhttpd-106ce90e-d496-4e96-a383-b471aeb5a224
    //17/12/29 14:15:47 INFO HttpServer: Starting HTTP Server
    //17/12/29 14:15:48 INFO Utils: Successfully started service 'HTTP file server' on port 55464.
    //17/12/29 14:15:48 INFO SparkEnv: Registering OutputCommitCoordinator
    //17/12/29 14:15:49 INFO Utils: Successfully started service 'SparkUI' on port 4040.
    //17/12/29 14:15:49 INFO SparkUI: Started SparkUI at http://172.18.3.7:4040
    //17/12/29 14:15:49 INFO Executor: Starting executor ID driver on host localhost
    //17/12/29 14:15:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 55483.
    //17/12/29 14:15:50 INFO NettyBlockTransferService: Server created on 55483
    //17/12/29 14:15:50 INFO BlockManagerMaster: Trying to register BlockManager
    //17/12/29 14:15:50 INFO BlockManagerMasterEndpoint: Registering block manager localhost:55483 with 467.6 MB RAM, BlockManagerId(driver, localhost, 55483)
    //17/12/29 14:15:50 INFO BlockManagerMaster: Registered BlockManager
    //+----+-------+
    //| age|   name|
    //+----+-------+
    //|null|Michael|
    //|  30|   Andy|
    //|  19| Justin|
    //+----+-------+
    //
    //root
    // |-- age: long (nullable = true)
    // |-- name: string (nullable = true)
    //
    //+-------+
    //|   name|
    //+-------+
    //|Michael|
    //|   Andy|
    //| Justin|
    //+-------+
    //
    //+-------+----------+
    //|   name|(age + 10)|
    //+-------+----------+
    //|Michael|      null|
    //|   Andy|        40|
    //| Justin|        29|
    //+-------+----------+
    //
    //+---+----+
    //|age|name|
    //+---+----+
    //| 30|Andy|
    //+---+----+
    //
    //+----+-----+
    //| age|count|
    //+----+-----+
    //|null|    1|
    //|  19|    1|
    //|  30|    1|
    //+----+-----+
  • 相关阅读:
    python : matplotlib does not work in Eclipse
    在线代码生成器的设计和使用
    Hama——BSP、Graph教程
    oozie:hadoop中的工作流引擎
    oracle命令建库全过程
    IntelliJ IDEA 自动生成方法注释(含参数及返回值)转+亲测IDEA2018.3
    转:Can't connect to MySQL server on 'XXXX' (10055) 解决方案
    资源的释放
    java中的break、continue、return的区别
    解析xml文件的方式
  • 原文地址:https://www.cnblogs.com/alamps/p/8144298.html
Copyright © 2011-2022 走看看