class SparkTest {
}
import java.util.concurrent.TimeUnit
import org.apache.spark.sql.SparkSession
object SparkDemo {
def main(args: Array[String]) {
println("aa")
/** Usage: HdfsTest [file] */
// if (args.length < 1) {
// System.err.println("Usage: HdfsTest <file>")
// System.exit(1)
// }
val spark = SparkSession
.builder.master("local")
.appName("HdfsTest")
.getOrCreate()
val file = spark.read.text("hdfs://localhost:9000/user/Administrator/tmp/a.txt").rdd
val mapped = file.map(s => s.length).cache()
for (iter <- 1 to 10) {
val startTimeNs = System.nanoTime()
for (x <- mapped) { x + 2 }
val durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
println(s"Iteration $iter took $durationMs ms")
}
println(s"File contents: ${file.map(_.toString).take(1).mkString(",").slice(0, 10)}")
println(s"Returned length(s) of: ${file.map(_.length).sum().toString}")
spark.stop()
}
}