zoukankan      html  css  js  c++  java
  • parquet文件的一些操作

    /**
    * 获取schema信息
    * @throws IOException
    */
    @Test
    public void testGetSchema() throws IOException {
    Configuration configuration = new Configuration(true);
    configuration.set("fs.defaultFS","hdfs://10.0.1.xx:9000");
    ParquetMetadata readFooter = null;
    Path parquetFilePath = new Path("/user/yanglei/parquet/douban.parquet");
    readFooter = ParquetFileReader.readFooter(configuration, parquetFilePath, ParquetMetadataConverter.NO_FILTER);
    MessageType schema =readFooter.getFileMetaData().getSchema();
    System.out.println(schema.toString());
    }

    public static final MessageType FILE_SCHEMA = Types.buildMessage()
    .required(PrimitiveType.PrimitiveTypeName.BINARY).named("user_name")
    .required(PrimitiveType.PrimitiveTypeName.INT64).named("bookid")
    .required(PrimitiveType.PrimitiveTypeName.INT32).named("bookscore")
    .named("douban");

    /**
    * 写parquet文件
    * @throws Exception
    */
    @Test
    public void testWriteParquet() throws Exception {
    Configuration conf = new Configuration(true);
    conf.set("fs.defaultFS","hdfs://10.0.1.xx:9000");

    String file = "/user/yanglei/parquet/douban.parquet";
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path)) {
    fs.delete(path, true);
    }
    GroupWriteSupport.setSchema(FILE_SCHEMA, conf);
    SimpleGroupFactory f = new SimpleGroupFactory(FILE_SCHEMA);
    ParquetWriter<Group> writer = new ParquetWriter<>(path, new GroupWriteSupport(),
    CompressionCodecName.GZIP, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);

    for (int i = 0; i < 1000; i++) {

    writer.write(
    f.newGroup()
    .append("user_name", String.valueOf(i))
    .append("bookid", 64l)
    .append("bookscore",i));
    }
    writer.close();
    }
    http://www.cnblogs.com/ylcoder/
  • 相关阅读:
    根据label字数计算UILable高度
    使用brew安装软件
    NSSearchPathDomainMask 详解
    Swift 使用代理和闭包(closure)反向传值
    Swift 闭包(closure)
    Swift 协议和委托(代理)
    Swift 2.0 : 'enumerate' is unavailable: call the 'enumerate()' method on the sequence
    cocoapods安装以及使用,安装过程中ruby版本过低
    Mac环境下svn的使用
    静态库和动态库详解
  • 原文地址:https://www.cnblogs.com/ylcoder/p/6417744.html
Copyright © 2011-2022 走看看