CharsRefIntHashMap并不比HashMap<String, Integer>快

zoukankan html css js c++ java

CharsRefIntHashMap并不比HashMap<String, Integer>快

我模仿lucene的BytesRef写了一个CharsRefIntHashMap,实測效果并不如HashMap<String, Integer>。代码例如以下:

package com.dp.arts.lucenex.utils;

import org.apache.lucene.util.CharsRef;

public interface CharsRefIntMap

{

public static abstract class CharsRefIntEntryAccessor {

public abstract void access(char[] arr, int offset, int length, int value);

}



public void incKey(CharsRef key);

public void incKey(CharsRef key, int add);

public void incKey(char[] arr, int offset, int length);

public void incKey(char[] arr, int offset, int length, int add);



public int get(CharsRef key);

public int get(CharsRef key, int no_entry_value);

public int get(char[] arr, int offset, int length);

public int get(char[] arr, int offset, int length, int no_entry_value);



public int size();



public void forEach(CharsRefIntEntryAccessor accesor);

}

package com.dp.arts.lucenex.utils;

import java.util.Arrays;

import org.apache.lucene.util.CharsRef;

import com.dp.arts.lucenex.utils.CharsRefIntMap.CharsRefIntEntryAccessor;

public class CharsRefIntHashMap implements CharsRefIntMap
{
public static final int DEFAULT_CAPACITY = 16;

private char[][] arrs;
private int[] offsets;
private int[] lengths;
private int[] ords;
private int[] values;

private int hashSize;
private int halfHashSize;
private int hashMask;
private int count;

public CharsRefIntHashMap() {
this(DEFAULT_CAPACITY);
}

public CharsRefIntHashMap(int capacity) {
assert capacity > 0 && ( (capacity & (capacity - 1)) == 0);

arrs = new char[capacity][];
offsets = new int[capacity];
lengths = new int[capacity];
ords = new int[capacity];
values = new int[capacity];

Arrays.fill(ords, -1);
hashSize = capacity;
halfHashSize = (capacity >>> 1);
hashMask = capacity - 1;
}

@Override
public void incKey(CharsRef key) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, 1);
}

@Override
public void incKey(CharsRef key, int add) {
int code = charsHashCode(key.chars, key.offset, key.length);
incKey(key.chars, key.offset, key.length, code, add);
}

@Override
public void incKey(char[] arr, int offset, int length) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, 1);
}

@Override
public void incKey(char[] arr, int offset, int length, int add) {
int code = charsHashCode(arr, offset, length);
incKey(arr, offset, length, code, add);
}

private void incKey(char[] arr, int offset, int length, int code, int add) {
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
if (e == -1) {
// new entry.
arrs[count] = arr;
offsets[count] = offset;
lengths[count] = length;
values[count] = add;
ords[pos] = count;
++count;
if (count == halfHashSize) {
rehash((hashSize << 1));
}
} else {
values[e] += add;
}
}

private void rehash(int newSize) {
char[][] newArrs = new char[newSize][];
int[] newOffsets = new int[newSize];
int[] newLengths = new int[newSize];
int[] newValues = new int[newSize];
System.arraycopy(arrs, 0, newArrs, 0, halfHashSize);
System.arraycopy(offsets, 0, newOffsets, 0, halfHashSize);
System.arraycopy(lengths, 0, newLengths, 0, halfHashSize);
System.arraycopy(values, 0, newValues, 0, halfHashSize);

final int[] newOrds = new int[newSize];
Arrays.fill(newOrds, -1);
final int newHashMask = newSize - 1;
for (int i = 0; i < hashSize; ++i) {
int e0 = ords[i];
if (e0 != -1) {
char[] arr = newArrs[e0];
int offset = newOffsets[e0];
int length = newLengths[e0];
int code = charsHashCode(arr, offset, length);
int pos = code & newHashMask;
while (newOrds[pos] != -1) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = code & newHashMask;
}
newOrds[pos] = e0;
}
}

ords = newOrds;
arrs = newArrs;
offsets = newOffsets;
lengths = newLengths;
values = newValues;

hashSize = newSize;
halfHashSize = (newSize >> 1);
hashMask = newHashMask;
}

public int charsHashCode(char[] chars, int offset, int length) {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
result = prime * result + chars[i];
}
return result;
}

public boolean charsEquals(char[] lhsArr, int lhsOffset, int lhsLength, char[] rhsArr, int rhsOffset, int rhsLength) {
if (lhsLength == rhsLength) {
int otherUpto = rhsOffset;
final int end = lhsOffset + lhsLength;
for (int upto = lhsOffset; upto < end; upto++, otherUpto++) {
if (lhsArr[upto] != rhsArr[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}

@Override
public int get(CharsRef key) {
return get(key.chars, key.offset, key.length, 0);
}

@Override
public int get(CharsRef key, int no_entry_key) {
return get(key.chars, key.offset, key.length, no_entry_key);
}

@Override
public int get(char[] arr, int offset, int length) {
return get(arr, offset, length, 0);
}

@Override
public int get(char[] arr, int offset, int length, int no_entry_key) {
int code = charsHashCode(arr, offset, length);
int pos = (code & hashMask);
int e = ords[pos];
while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {
final int inc = ((code >> 8) + code) | 1;
code += inc;
pos = (code & hashMask);
e = ords[pos];
}
return e == -1 ? no_entry_key : values[e];
}

@Override
public void forEach(CharsRefIntEntryAccessor accessor) {
for (int i = 0; i < hashSize; ++i) {
int pos = ords[i];
if (pos != -1) {
accessor.access(arrs[pos], offsets[pos], lengths[pos], values[pos]);
}
}
}

@Override
public int size() {
return count;
}

// for test only.
public int hashSize() {
return hashSize;
}
}

package com.dp.arts.lucenex.utils;

import java.util.HashMap;

import java.util.Random;

import org.apache.lucene.util.CharsRef;

public class CharsRefIntHashMapBenchmark

{

private static Random randGen = null;

private static char[] numbersAndLetters = null;

static {

randGen = new Random();

numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +

   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();

}

private static final String randomString(int length) {

if (length < 1) {

return null;

}

char [] randBuffer = new char[length];

for (int i=0; i<randBuffer.length; i++) {

randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];

}

return new String(randBuffer);

}



public static void main(String[] args) {

final int MAX = 100000;

String[] strs = new String[10000];

int[] values = new int[MAX];

for (int i = 0; i < 10000; ++i) {

strs[i] = randomString(randGen.nextInt(10) + 1);

}

for (int i = 0; i < MAX; ++i) {

values[i] = randGen.nextInt(10000);

}



char[][] arrs = new char[MAX][];

int offsets[] = new int[MAX];

int counts[] = new int[MAX];

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

arrs[i] = StringMisc.toCharArray(s);

offsets[i] = StringMisc.getOffset(s);

counts[i] = StringMisc.getCount(s);

}

long start = System.currentTimeMillis();

CharsRefIntHashMap map = new CharsRefIntHashMap();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

map.incKey(arrs[i], offsets[i], counts[i]);

}}

System.err.println("CharsRefIntHashMap time elapsed: " + (System.currentTimeMillis() - start) + "ms.");

start = System.currentTimeMillis();

HashMap<String, Integer> oldMap = new HashMap<String, Integer>();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

Integer v = oldMap.get(s);

if (v == null) {

v = new Integer(1);

oldMap.put(s, v);

} else {

v += 1;

}

}}

System.err.println("Origin string map time elapsed: " + (System.currentTimeMillis() - start) + "ms.");

}

}

因此这样写优点不过内存少用一些，性能应该更差,rehash时须要拷贝很多其它数据，对每一个数据的訪问都须要下标。实际情况下,CharsRef所须要的内存是24字节。假设使用trove的TObjectIntHashMap,插入速度相当,查询速度是jdk hashmap的三倍。

查看全文

相关阅读:
chrome浏览器中安装以及使用Elasticsearch head 插件
 windows10 升级并安装配置 jmeter5.3
linux下部署Elasticsearch6.8.1版本的集群
 【Rollo的Python之路】Python 爬虫系统学习 (八) logging模块的使用
 【Rollo的Python之路】Python 爬虫系统学习 (七) Scrapy初识
 【Rollo的Python之路】Python 爬虫系统学习 (六) Selenium 模拟登录
 【Rollo的Python之路】Python 爬虫系统学习 (五) Selenium
【Rollo的Python之路】Python 爬虫系统学习 (四) XPath学习
 【Rollo的Python之路】Python 爬虫系统学习 (三)
【Rollo的Python之路】Python sys argv[] 函数用法笔记

原文地址：https://www.cnblogs.com/jhcelue/p/7102149.html

CharsRefIntHashMap并不比HashMap&lt;String, Integer&gt;快

CharsRefIntHashMap并不比HashMap<String, Integer>快