Dataset的数据构建
构建Dataset数据
代码示例
`
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
/**
* 输入数据类型 IN:输入数据类型
*/
public class WeSpark implements Serializable {
public static class EmployeeSpark implements Serializable {
private String name;
private long salary;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public long getSalary() {
return salary;
}
public void setSalary(long salary) {
this.salary = salary;
}
public EmployeeSpark(){}
private EmployeeSpark(String name, long salary){
this.name = name;
this.salary = salary;
}
}
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Java Spark SQL data sources example")
.config("spark.some.config.option", "some-value")
.master("local[2]")
.getOrCreate();
//构建原始的DataFrame
// Create an instance of a Bean class
List<EmployeeSpark> Da = Arrays.asList(
new EmployeeSpark("CFF",300L),
new EmployeeSpark("ADD",200L)
);
Encoder<EmployeeSpark> personEncoder = Encoders.bean(EmployeeSpark.class);
Dataset<EmployeeSpark> items2Dataset = spark.createDataset( Da, personEncoder);
items2Dataset.printSchema();
items2Dataset.show();
// EmployeeSpark 无参数构造
System.out.println(items2Dataset.head().getName());
System.out.println(items2Dataset.head().getSalary());
}
}
`
报错解决
caused by: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 87:
出现动态代码生成报错
1.把 age/depID的类型改为
2.权限
3.构造函数
参考
https://www.jianshu.com/p/dc166fdfe840