- java로 Map List를 Dataset으로 변환하는 방법
public void writeListToParquet(List<Map<String,Object>> list,String year,String month,String date){
SparkSession spark =
SparkSession.builder().appName("Spark")
.config("spark.executer.memory","4g")
.config("spark.driver.bindAddress","10.10.~.~")
.master("local")
.getOrCreate();
//틀 생성 - table create
StructType schema = DataTypes.createStructType(new StructField[]{
DataTypes.createStructField("server_dt", DataTypes.TimestampType,true),
DataTypes.createStructField("user_nm", DataTypes.StringType,true),
DataTypes.createStructField("user_id", DataTypes.StringType,true),
DataTypes.createStructField("group_nm", DataTypes.StringType,true),
DataTypes.createStructField("client_dt", DataTypes.StringType,true),
DataTypes.createStructField("sub_category", DataTypes.IntegerType,true),
DataTypes.createStructField("category", DataTypes.IntegerType,true),
DataTypes.createStructField("remote_port", DataTypes.IntegerType,true),
DataTypes.createStructField("@timestamp", DataTypes.StringType,true)
});
List<Row> rowList = new ArrayList<>();
for(Map<String,Object> map : list){
try {
String get_server_dt = (String) map.get("server_dt");
String user_nm = (String) map.get("user_nm");
String user_id = (String) map.get("user_id");
String group_nm = (String) map.get("group_nm");
String client_dt = (String) map.get("client_dt");
int sub_category = (int) map.get("sub_category");
int category = (int) map.get("category");
int remote_port = (int) map.get("remote_port");
String timestamp = (String) map.get("@timestamp");
Date parsedDate = queryFormat.parse(get_server_dt);
Timestamp server_dt = new Timestamp(parsedDate.getTime());
Object[] values = {
server_dt,user_nm,user_id,group_nm,client_dt,sub_category,category,remote_port,timestamp};
Row row = RowFactory.create(values);
rowList.add(row);
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
Dataset<Row> data = spark.createDataFrame(rowList,schema);
data.write()
.option("maxRecordsPerFile",5000000)
.mode(SaveMode.Overwrite).parquet("/home/info/programlogs/"+year+"/"+year+"-"+month+"-"+date);
System.out.println("CREATED parquet : /home/info/programlogs/"+year+"/"+year+"-"+month+"-"+date);
spark.close();
}