BloomFilter算法在搜索引擎方面的应用一般作为URL消重(http://wiki.commerce.net/wiki/URL_History_Bloom_Filters或http://tianchunfeng.spaces.live.com/blog/cns!819E33AA1808A272!288.entry),其具体原理请参考:http://blog.csdn.net/jiaomeng/,我只给出C#实现:
1
public class BloomFilter<T>
2
{
3
private BitArray _bitArray = null;
4
private int _count = 0;
5
private int _hashcount = 1;
6
7
public BloomFilter(int size, int hashcount)
8
{
9
_bitArray = new BitArray(size, false);
10
_hashcount = hashcount;
11
}
12
13
public void Add(T item)
14
{
15
int h1 = item.GetHashCode();
16
int h2 = Hash(h1.ToString());
17
18
bool result = false;
19
unchecked
20
{
21
h1 = (int)(((uint)h1) % _bitArray.Count);
22
h2 = (int)(((uint)h2) % _bitArray.Count);
23
}
24
for (int i = 0; i < _hashcount; i++)
25
{
26
if (!_bitArray[h1])
27
{
28
_bitArray[h1] = result = true;
29
}
30
31
unchecked
32
{
33
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
34
h2 = (int)((uint)(h2 + i) % _bitArray.Count);
35
}
36
}
37
if (result)
38
{
39
_count++;
40
}
41
}
42
43
public bool Contains(T item)
44
{
45
46
int h1 = item.GetHashCode();
47
int h2 = Hash(h1.ToString());
48
unchecked
49
{
50
h1 = (int)(((uint)h1) % _bitArray.Count);
51
h2 = (int)(((uint)h2) % _bitArray.Count);
52
}
53
for (int i = 0; i < _hashcount; i++)
54
{
55
if (_bitArray[h1] == false)
56
{
57
return false;
58
}
59
unchecked
60
{
61
h1 = (int)((uint)(h1 + h2) % _bitArray.Count);
62
h2 = (int)((uint)(h2 + i) % _bitArray.Count);
63
}
64
}
65
return true;
66
67
}
68
69
70
71
protected int Hash(T item)
72
{
73
int hashcode = item.GetHashCode();
74
75
hashcode = Hash(hashcode.ToString());
76
77
return hashcode;
78
}
79
80
/// <summary>
81
/// 字符串Hash函数(AP Hash Function)
82
/// </summary>
83
/// <param name="str">需要Hash的字符串</param>
84
/// <returns></returns>
85
protected int Hash(string str)
86
{
87
long hash = 0;
88
89
for (int i = 0; i < str.Length; i++)
90
{
91
if ((i & 1) == 0)
92
{
93
hash ^= ((hash << 7) ^ str[i] ^ (hash >> 3));
94
}
95
else
96
{
97
hash ^= (~((hash << 11) ^ str[i] ^ (hash >> 5)));
98
}
99
}
100
unchecked
101
{
102
return (int)hash;
103
}
104
}
105
106
107
/// <summary>
108
/// 返回BloomFilter中的元素个数
109
/// </summary>
110
public int Count
111
{
112
get
113
{
114
return _count;
115
}
116
}
117
118
public int SizeBytes
119
{
120
get
121
{
122
return _bitArray.Length;
123
}
124
}

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

如果大家发现程序有问题,请及时的反馈给我,nsharp at 8u8.com谢谢