zoukankan      html  css  js  c++  java
  • java

    读取orc文件

        @Test
        public void readOrc() throws IOException {
            Configuration conf = new Configuration();
            Reader reader = OrcFile.createReader(new Path("/tmp/Orc.orc"),
                    OrcFile.readerOptions(conf));
            RecordReader rows = reader.rows();
            VectorizedRowBatch batch = reader.getSchema().createRowBatch();
            while (rows.nextBatch(batch)) {
                System.out.println(batch.toString());
            }
            rows.close();
        }

    写orc文件---一行

        @Test
        public void writeLine3() throws IOException {
            Configuration conf = new Configuration();
            TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
            Writer writer = OrcFile.createWriter(new Path("/tmp/Orc.orc"),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema));
            VectorizedRowBatch batch = schema.createRowBatch();
            LongColumnVector x = (LongColumnVector) batch.cols[0];
            LongColumnVector y = (LongColumnVector) batch.cols[1];
            int row = batch.size++;
            x.vector[row] = 2;
            y.vector[row] = 2 * 3;
            if (batch.size != 0) {
                writer.addRowBatch(batch);
                batch.reset();
            }
            writer.close();
        }

    写orc文件--多行

        @Test
        public void writeLine2() throws IOException {
            String[] lines = new String[]{"1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd"};
    //        String[] lines = new String[]{"1,2,4", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3"};
    
    
            Configuration conf = new Configuration();
            TypeDescription schema = TypeDescription.fromString("struct<field1:String,field2:String,field3:String>");
    //        TypeDescription schema = TypeDescription.fromString("struct<field1:int,field2:int,field3:int>");
            Writer writer = OrcFile.createWriter(new Path("/tmp/Orc.orc"),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema).overwrite(true));
            VectorizedRowBatch batch = schema.createRowBatch();
            List<? super ColumnVector> columnVectors = new ArrayList<>();
    
            for (int i = 0; i < batch.numCols; i++) {
                columnVectors.add(batch.cols[i]);
            }
    
            for (String line : lines) {
                String[] columns = line.split(",");
                System.out.println(batch.size);
                int row = batch.size++;
                for (int i = 0; i < columns.length; i++) {
                    switch (columnVectors.get(i).getClass().getName()) {
                        case "org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector":
                            BytesColumnVector bytesColumnVector = BytesColumnVector.class.cast(columnVectors.get(i));
                            bytesColumnVector.setVal(row, columns[i].getBytes(), 0, columns[i].getBytes().length);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.LongColumnVector":
                            LongColumnVector longColumnVector = LongColumnVector.class.cast(columnVectors.get(i));
                            longColumnVector.vector[row] = Long.parseLong(columns[i]);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector":
                            Decimal64ColumnVector decimal64ColumnVector = Decimal64ColumnVector.class.cast(columnVectors.get(i));
                            decimal64ColumnVector.set(row, HiveDecimal.create(columns[i]));
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector":
                            DecimalColumnVector decimalColumnVector = DecimalColumnVector.class.cast(columnVectors.get(i));
                            decimalColumnVector.set(row, HiveDecimal.create(columns[i]));
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector":
                            DoubleColumnVector doubleColumnVector = DoubleColumnVector.class.cast(columnVectors.get(i));
                            doubleColumnVector.vector[row] = Double.parseDouble(columns[i]);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector":
                            TimestampColumnVector timestampColumnVector = TimestampColumnVector.class.cast(columnVectors.get(i));
                            timestampColumnVector.set(row, java.sql.Timestamp.valueOf(columns[i]));
                            break;
                    }
                    if (batch.size == batch.getMaxSize()) {
                        writer.addRowBatch(batch);
                        batch.reset();
                    }
                }
            }
            if (batch.size != 0) {
                writer.addRowBatch(batch);
                batch.reset();
            }
            writer.close();
    
        }

    引用jar

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
    import org.apache.orc.*;
    import org.junit.Test;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
  • 相关阅读:
    VIJOS-P1340 拯救ice-cream(广搜+优先级队列)
    uva 11754 Code Feat
    uva11426 GCD Extreme(II)
    uvalive 4119 Always an Interger
    POJ 1442 Black Box 优先队列
    2014上海网络赛 HDU 5053 the Sum of Cube
    uvalive 4795 Paperweight
    uvalive 4589 Asteroids
    uvalive 4973 Ardenia
    DP——数字游戏
  • 原文地址:https://www.cnblogs.com/BigWrite/p/13651478.html
Copyright © 2011-2022 走看看