直接上code:
1 public class OverlapCoefficient 2 { 3 private Collection<string> tokenSet; 4 5 private string[] delimiters; 6 7 public OverlapCoefficient() 8 { 9 this.tokenSet = new Collection<string>(); 10 this.delimiters = new string[] { " ", "-", "-", "_" }; 11 } 12 13 public int GetSimilarity(string first, string second) 14 { 15 if (first != null && second != null) 16 { 17 return this.GetOverlapCount(this.Tokenize(first), this.Tokenize(second)); 18 } 19 20 return 0; 21 } 22 23 24 public int GetOverlapCount(Collection<string> firstTokens, Collection<string> secondTokens) 25 { 26 this.tokenSet.Clear(); 27 int firstSetTokenCount = this.CalculateUniqueTokensCount(firstTokens); 28 int secondSetTokenCount = this.CalculateUniqueTokensCount(secondTokens); 29 this.MergeIntoSet(firstTokens); 30 this.MergeIntoSet(secondTokens); 31 32 int overlap = firstSetTokenCount + secondSetTokenCount - this.tokenSet.Count; 33 return overlap; 34 } 35 36 private void MergeIntoSet(Collection<string> firstTokens) 37 { 38 foreach (string local in firstTokens) 39 { 40 if (!this.tokenSet.Contains(local)) 41 { 42 this.tokenSet.Add(local); 43 } 44 } 45 } 46 47 private Collection<string> Tokenize(string word) 48 { 49 Collection<string> res = new Collection<string>(word.Split(this.delimiters, StringSplitOptions.RemoveEmptyEntries).ToArray()); 50 return res; 51 } 52 53 private int CalculateUniqueTokensCount(Collection<string> tokenList) 54 { 55 Collection<string> collection = new Collection<string>(); 56 foreach (string local in tokenList) 57 { 58 if (!collection.Contains(local)) 59 { 60 collection.Add(local); 61 } 62 } 63 64 return collection.Count; 65 } 66 }