zoukankan      html  css  js  c++  java
  • spark调用第三方工具spark-excel,文件过大,oom

    /**
       * https://github.com/crealytics/spark-excel
       * @param sparkSession
       * @param filePath
       * @param header
       * @return
       */
      def sparkExcel(sparkSession: SparkSession, filePath: String, header: Boolean): DataFrame = {
        println("--------------------sparkExcel-----:", filePath)
        import com.crealytics.spark.excel._
        val df = sparkSession.read.excel(
          header = header,  // Required
          //      dataAddress = "'My Sheet'!B3:C35", // Optional, default: "A1"
          treatEmptyValuesAsNulls = true,  // Optional, default: true
          inferSchema = false,  // Optional, default: false
          addColorColumns = false,  // Optional, default: false
          //      timestampFormat = "MM-dd-yyyy HH:mm:ss",  // Optional, default: yyyy-mm-dd hh:mm:ss[.fffffffff]
                maxRowsInMemory = 20  // Optional, default None. If set, uses a streaming reader which can help with big files
          //      excerptSize = 10,  // Optional, default: 10. If set and if schema inferred, number of rows to infer schema from
          //      workbookPassword = "pass"  // Optional, default None. Requires unlimited strength JCE for older JVMs
        ).load(filePath)
    
        df.show(5)
    
        df
      }
    

      设置:maxRowsInMemory

  • 相关阅读:
    jstat命令-帮助优化java性能
    使用nginx搭建简单文件服务器
    JVM初探
    fastDFS shiro权限校验 redis FreeMark页面静态化
    spring AOP
    Java多线程并发
    js插件之Ocupload
    Jquery总结
    shell
    Linux(二)
  • 原文地址:https://www.cnblogs.com/long-yuan/p/13477372.html
Copyright © 2011-2022 走看看