BloomFilter算法在搜索引擎方面的应用一般作为URL消重(http://wiki.commerce.net/wiki/URL_History_Bloom_Filters或http://tianchunfeng.spaces.live.com/blog/cns!819E33AA1808A272!288.entry),其具体原理请参考:http://blog.csdn.net/jiaomeng/,我只给出C#实现:
1
public class BloomFilter<T>
2
{
3
private BitArray _bitArray = null;
4
private int _count = 0;
5
private int _hashcount = 1;
6
7
public BloomFilter(int size, int hashcount)
8
{
9
_bitArray = new BitArray(size, false);
10
_hashcount = hashcount;
11
}
12
13
public void Add(T item)
14
{
15
int h1 = item.GetHashCode();
16
int h2 = Hash(h1.ToString());
17
18
bool result = false;
19
unchecked
20
{
21
h1 = (int)(((uint)h1) % _bitArray.Count);
22
h2 = (int)(((uint)h2) % _bitArray.Count);
23
}
24
for (int i = 0; i < _hashcount; i++)
25
{
26
if (!_bitArray[h1])
27
{
28
_bitArray[h1] = result = true;
29
}
30
31
unchecked
32
{
33
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
34
h2 = (int)((uint)(h2 + i) % _bitArray.Count);
35
}
36
}
37
if (result)
38
{
39
_count++;
40
}
41
}
42
43
public bool Contains(T item)
44
{
45
46
int h1 = item.GetHashCode();
47
int h2 = Hash(h1.ToString());
48
unchecked
49
{
50
h1 = (int)(((uint)h1) % _bitArray.Count);
51
h2 = (int)(((uint)h2) % _bitArray.Count);
52
}
53
for (int i = 0; i < _hashcount; i++)
54
{
55
if (_bitArray[h1] == false)
56
{
57
return false;
58
}
59
unchecked
60
{
61
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
62
h2 = (int)((uint)(h2 + i) % _bitArray.Count);
63
}
64
}
65
return true;
66
67
}
68
69
70
71
protected int Hash(T item)
72
{
73
int hashcode = item.GetHashCode();
74
75
hashcode = Hash(hashcode.ToString());
76
77
return hashcode;
78
}
79
80
/// <summary>
81
/// 字符串Hash函数(AP Hash Function)
82
/// </summary>
83
/// <param name="str">需要Hash的字符串</param>
84
/// <returns></returns>
85
protected int Hash(string str)
86
{
87
long hash = 0;
88
89
for (int i = 0; i < str.Length; i++)
90
{
91
if ((i & 1) == 0)
92
{
93
hash ^= ((hash << 7) ^ str[i] ^ (hash >> 3));
94
}
95
else
96
{
97
hash ^= (~((hash << 11) ^ str[i] ^ (hash >> 5)));
98
}
99
}
100
unchecked
101
{
102
return (int)hash;
103
}
104
}
105
106
107
/// <summary>
108
/// 返回BloomFilter中的元素个数
109
/// </summary>
110
public int Count
111
{
112
get
113
{
114
return _count;
115
}
116
}
117
118
public int SizeBytes
119
{
120
get
121
{
122
return _bitArray.Length;
123
}
124
}
public class BloomFilter<T>2
{3
private BitArray _bitArray = null;4
private int _count = 0;5
private int _hashcount = 1;6

7
public BloomFilter(int size, int hashcount)8
{9
_bitArray = new BitArray(size, false);10
_hashcount = hashcount;11
}12

13
public void Add(T item)14
{15
int h1 = item.GetHashCode();16
int h2 = Hash(h1.ToString());17

18
bool result = false;19
unchecked20
{21
h1 = (int)(((uint)h1) % _bitArray.Count);22
h2 = (int)(((uint)h2) % _bitArray.Count);23
}24
for (int i = 0; i < _hashcount; i++)25
{26
if (!_bitArray[h1])27
{28
_bitArray[h1] = result = true;29
}30

31
unchecked32
{33
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);34
h2 = (int)((uint)(h2 + i) % _bitArray.Count);35
}36
}37
if (result)38
{39
_count++;40
}41
}42

43
public bool Contains(T item)44
{45

46
int h1 = item.GetHashCode();47
int h2 = Hash(h1.ToString());48
unchecked49
{50
h1 = (int)(((uint)h1) % _bitArray.Count);51
h2 = (int)(((uint)h2) % _bitArray.Count);52
}53
for (int i = 0; i < _hashcount; i++)54
{55
if (_bitArray[h1] == false)56
{57
return false;58
}59
unchecked60
{61
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);62
h2 = (int)((uint)(h2 + i) % _bitArray.Count);63
}64
}65
return true;66

67
}68

69

70

71
protected int Hash(T item)72
{73
int hashcode = item.GetHashCode();74

75
hashcode = Hash(hashcode.ToString());76

77
return hashcode;78
}79

80
/// <summary>81
/// 字符串Hash函数(AP Hash Function)82
/// </summary>83
/// <param name="str">需要Hash的字符串</param>84
/// <returns></returns>85
protected int Hash(string str)86
{87
long hash = 0;88

89
for (int i = 0; i < str.Length; i++)90
{91
if ((i & 1) == 0)92
{93
hash ^= ((hash << 7) ^ str[i] ^ (hash >> 3));94
}95
else96
{97
hash ^= (~((hash << 11) ^ str[i] ^ (hash >> 5)));98
}99
}100
unchecked101
{102
return (int)hash;103
}104
}105

106

107
/// <summary>108
/// 返回BloomFilter中的元素个数109
/// </summary>110
public int Count111
{112
get113
{114
return _count;115
}116
}117

118
public int SizeBytes119
{120
get121
{122
return _bitArray.Length;123
}124
}如果大家发现程序有问题,请及时的反馈给我,nsharp at 8u8.com谢谢
