zoukankan      html  css  js  c++  java
  • phoenix二级索引源码阅读

    Phoenix二级索引建立源码

    Phoenix二级索引建立在hbasecoprocess功能,建立索引的时候使用是

     

     

    二级索引建立过程,索引rowkey的构建是一个数据流,不停在后面追加,最后生成最终的rowkey形式

    public byte[] buildRowKey(ValueGetter valueGetter, ImmutableBytesWritable rowKeyPtr, byte[] regionStartKey, byte[] regionEndKey, long ts)  {
    public byte[] buildRowKey(ValueGetter valueGetter, ImmutableBytesWritable rowKeyPtr, byte[] regionStartKey, byte[] regionEndKey, long ts)  {
        ImmutableBytesWritable ptr = new ImmutableBytesWritable();

    //判断是否是构建本地索引,考虑两个条件:1.本地索引是否开启 2.startRK 是否传进来了
        boolean prependRegionStartKey = isLocalIndex && regionStartKey != null;
        boolean isIndexSalted = !isLocalIndex && nIndexSaltBuckets > 0;

    //如果开启本地索引,则在数据前面添加前缀,判断startRK是否是region起始startRK,如果是则使用该region的EndRK
        int prefixKeyLength =
                prependRegionStartKey ? (regionStartKey.length != 0 ? regionStartKey.length
                        : regionEndKey.length) : 0;
        TrustedByteArrayOutputStream stream = new TrustedByteArrayOutputStream(estimatedIndexRowKeyBytes + (prependRegionStartKey ? prefixKeyLength : 0));

    // 构建数据流对象,对数据进行put
        DataOutput output = new DataOutputStream(stream);

     

     

     

     

    如果是本地索引,则在rowkey前加入startrowkey索引

    // For local indexes, we must prepend the row key with the start region key
    if (prependRegionStartKey) {
        if (regionStartKey.length == 0) {

    // 如果startRK为null,则其实使用的endRK
            output.write(new byte[prefixKeyLength]);
        } else {
            output.write(regionStartKey);
        }

    }

     

     

     

    判断是否有加盐,如果有,则增加一个标志位,后面再更改这个标志位

    if (isIndexSalted) {
        output.write(0); // will be set at end to index salt byte
    }

     

     

     

    如果在索引视图id不为null,会在索引rowkey中加入视图id

    if (viewIndexId != null) {
        output.write(viewIndexId);
    }

     

     

    判断是否启动多租户,如果启动多租户的场景,添加多租户信息;

    if (isMultiTenant) {
        dataRowKeySchema.next(ptr, dataPosOffset, maxRowKeyOffset);
        output.write(ptr.get(), ptr.getOffset(), ptr.getLength());
        if (!dataRowKeySchema.getField(dataPosOffset).getDataType().isFixedWidth()) {
            output.writeByte(SchemaUtil.getSeparatorByte(rowKeyOrderOptimizable, ptr.getLength()==0, dataRowKeySchema.getField(dataPosOffset)));
        }
        dataPosOffset++;
    }

     

     

     

     

    dataRowKeySchema是数据表的信息,忽略在视图变量的中常量值,并标记出原表pk的rowkey的offset 和 length,方便后面定位数据表rowkey插入。

     

    for (int i = dataPosOffset; i < dataRowKeySchema.getFieldCount(); i++) {
        Boolean hasValue=dataRowKeySchema.next(ptr, i, maxRowKeyOffset);
        // Ignore view constants from the data table, as these
        // don't need to appear in the index (as they're the
        // same for all rows in this index)
        if (!viewConstantColumnBitSet.get(i)) {
            int pos = rowKeyMetaData.getIndexPkPosition(i-dataPosOffset);
            if (Boolean.TRUE.equals(hasValue)) {
                dataRowKeyLocator[0][pos] = ptr.getOffset();
                dataRowKeyLocator[1][pos] = ptr.getLength();
            } else {
                dataRowKeyLocator[0][pos] = 0;
                dataRowKeyLocator[1][pos] = 0;
            }
        }
    }

     

     

     

    考虑索引的数据的顺序,考虑索引的顺序等

     

    // 获取表达式索引,表达式索引默认值都为1,未开启的时候isNullAble为true

    Iterator<Expression> expressionIterator = indexedExpressions.iterator();

    //  nIndexedColumns 的构成是索引列+主键 如果是组合索引,则循环多个索引列
    for (int i = 0; i < nIndexedColumns; i++) {
        PDataType dataColumnType;
        boolean isNullable;
        SortOrder dataSortOrder;

    // dataPkPosition为-1则表示为表达式索引,否则为属性索引
        if (dataPkPosition[i] == EXPRESSION_NOT_PRESENT) {
           Expression expression = expressionIterator.next();
           dataColumnType = expression.getDataType();
           dataSortOrder = expression.getSortOrder();
            isNullable = expression.isNullable();
           expression.evaluate(new ValueGetterTuple(valueGetter, ts), ptr);
        }

    //  主键pk 走这个分支
        else {
            Field field = dataRowKeySchema.getField(dataPkPosition[i]);
            dataColumnType = field.getDataType();
            ptr.set(rowKeyPtr.get(), dataRowKeyLocator[0][i], dataRowKeyLocator[1][i]);
            dataSortOrder = field.getSortOrder();
            isNullable = field.isNullable();
        }

    // 考虑列值的顺序,考虑字节的比较,考虑索引列的顺序

    // 判断查询是否desc,默认为asc。
        boolean isDataColumnInverted = dataSortOrder != SortOrder.ASC;

    // 获取索引列的的数据类型,详情看后面getIndexColumnDataType函数
        PDataType indexColumnType = IndexUtil.getIndexColumnDataType(isNullable, dataColumnType);

    //根据数据列返回不同的datatype,判断该列是否可比较。不可比较的列有decimal,varchar,boolean,Binary
        boolean isBytesComparable = dataColumnType.isBytesComparableWith(indexColumnType);

    // 获取列是否是逆序的
        boolean isIndexColumnDesc = descIndexColumnBitSet.get(i);
        if (isBytesComparable && isDataColumnInverted == isIndexColumnDesc) {
            output.write(ptr.get(), ptr.getOffset(), ptr.getLength());
        } else {
            if (!isBytesComparable)  {

    // 让不可比较的类型具有可比性
                indexColumnType.coerceBytes(ptr, dataColumnType, dataSortOrder, SortOrder.getDefault());
            }

    // 按位取异或值,二进制数比较肯定是字典序,从最高位开始比较,直到遇到第一个不一样的位,这个位上哪个数等于1哪个数就较大。 
            if (isDataColumnInverted != isIndexColumnDesc) {
                writeInverted(ptr.get(), ptr.getOffset(), ptr.getLength(), output);
            } else {
                output.write(ptr.get(), ptr.getOffset(), ptr.getLength());
            }
        }

    // 判断数据是不是一个固定长度的字段,如果不是根据数据的正序逆序添加一个标志位
        if (!indexColumnType.isFixedWidth()) {
            output.writeByte(SchemaUtil.getSeparatorByte(rowKeyOrderOptimizable, ptr.getLength() == 0, isIndexColumnDesc ? SortOrder.DESC : SortOrder.ASC));

    }

    }

     

     

    //填充开始的加盐部分的字节位,规则是根据数据做hash,然后再对nIndexSaltBuckets取余

    if (isIndexSalted) {
        // Set salt byte
        byte saltByte = SaltingUtil.getSaltingByte(indexRowKey, SaltingUtil.NUM_SALTING_BYTES, length-SaltingUtil.NUM_SALTING_BYTES, nIndexSaltBuckets);
        indexRowKey[0] = saltByte;
    }

     

     

     

    返回所有的生成的rowkey

    return indexRowKey.length == length ? indexRowKey : Arrays.copyOf(indexRowKey, length);

     

     

     

    根据数据列返回不同的datatype,判断该列是否可比较。不可比较的列有decimal,varchar,boolean,Binary

    // Since we cannot have nullable fixed length in a row key
    // we need to translate to variable length. The verification that we have a valid index
    // row key was already done, so here we just need to convert from one built-in type to
    // another.
    public static PDataType getIndexColumnDataType(boolean isNullable, PDataType dataType) {
        if (dataType == null || !isNullable || !dataType.isFixedWidth()) {
            return dataType;
        }
        // for fixed length numeric types and boolean
        if (dataType.isCastableTo(PDecimal.INSTANCE)) {
            return PDecimal.INSTANCE;
        }
        // for CHAR
        if (dataType.isCoercibleTo(PVarchar.INSTANCE)) {
            return PVarchar.INSTANCE;
        }

        if (PBinary.INSTANCE.equals(dataType)) {
            return PVarbinary.INSTANCE;
        }
        throw new IllegalArgumentException("Unsupported non nullable type " + dataType);
    }

     

     

     

    让数据有可比性

    protected static int toBytes(BigDecimal v, byte[] result, final int offset, int length) {
        // From scale to exponent byte (if BigDecimal is positive): (-(scale+(scale % 2 == 0 : 0 : 1)) / 2 + 65) | 0x80
        // If scale % 2 is 1 (i.e. it's odd), then multiple last base-100 digit by 10
        // For example: new BigDecimal(BigInteger.valueOf(1), -4);
        // (byte)((-(-4+0) / 2 + 65) | 0x80) = -61
        // From scale to exponent byte (if BigDecimal is negative): ~(-(scale+1)/2 + 65 + 128) & 0x7F
        // For example: new BigDecimal(BigInteger.valueOf(1), 2);
        // ~(-2/2 + 65 + 128) & 0x7F = 63

  • 相关阅读:
    我的那些年(11)~去一家创业公司做架构师
    springboot~hazelcast缓存中间件
    我的那些年(10)~来到更大的团队,做了架构师
    springboot~maven制作底层公用库
    Vesions ignore & ld: library not found for -l...
    iOS利用单例实现不同界面间的数据传输
    UVA 10006
    VS2010打开就自动关闭问题解决
    模板方法模式(Template Method)
    HDU 4279
  • 原文地址:https://www.cnblogs.com/yankang/p/10617601.html
Copyright © 2011-2022 走看看