parquetjs 是纯js 的parquet 文件创建以及读取工具,以下是一个简单的使用
参考代码
var parquet = require('parquetjs');
var schema = new parquet.ParquetSchema({
name: { type: 'UTF8' },
quantity: { type: 'INT64' },
price: { type: 'DOUBLE' },
date: { type: 'TIMESTAMP_MILLIS' },
in_stock: { type: 'BOOLEAN' }
});
// append a few rows to the file
let write = async function () {
var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet');
await writer.appendRow({ name: 'apples', quantity: 10, price: 2.5, date: new Date(), in_stock: true });
await writer.appendRow({ name: 'oranges', quantity: 10, price: 2.5, date: new Date(), in_stock: true });
await writer.close();
}
let read = async function () {
let reader = await parquet.ParquetReader.openFile('fruits.parquet');
// create a new cursor
let cursor = reader.getCursor();
// read all records from the file and print them
let record = null;
while (record = await cursor.next()) {
console.log(record);
}
}
write().then(()=>{
read()
})
参考资料
https://github.com/ironSource/parquetjs
https://parquet.apache.org/documentation/latest/