/** * https://github.com/crealytics/spark-excel * @param sparkSession * @param filePath * @param header * @return */ def sparkExcel(sparkSession: SparkSession, filePath: String, header: Boolean): DataFrame = { println("--------------------sparkExcel-----:", filePath) import com.crealytics.spark.excel._ val df = sparkSession.read.excel( header = header, // Required // dataAddress = "'My Sheet'!B3:C35", // Optional, default: "A1" treatEmptyValuesAsNulls = true, // Optional, default: true inferSchema = false, // Optional, default: false addColorColumns = false, // Optional, default: false // timestampFormat = "MM-dd-yyyy HH:mm:ss", // Optional, default: yyyy-mm-dd hh:mm:ss[.fffffffff] maxRowsInMemory = 20 // Optional, default None. If set, uses a streaming reader which can help with big files // excerptSize = 10, // Optional, default: 10. If set and if schema inferred, number of rows to infer schema from // workbookPassword = "pass" // Optional, default None. Requires unlimited strength JCE for older JVMs ).load(filePath) df.show(5) df }
设置:maxRowsInMemory