package com.gpdi.action; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; public class WordsStatistics { class Obj { int count ; Obj(int count){ this.count = count; } } public List<WordCount> statistics(String word) { List<WordCount> rs = new ArrayList<WordCount>(); Map <String,Obj> map = new HashMap<String,Obj>(); if(word == null ) { return null; } word = word.toLowerCase(); word = word.replaceAll("'s", ""); word = word.replaceAll(",", ""); word = word.replaceAll("-", ""); word = word.replaceAll("\\.", ""); word = word.replaceAll("'", ""); word = word.replaceAll(":", ""); word = word.replaceAll("!", ""); word = word.replaceAll("\n", ""); String [] wordArray = word.split(" "); for(String simpleWord : wordArray) { simpleWord = simpleWord.trim(); if (simpleWord != null && !simpleWord.equalsIgnoreCase("")) { Obj cnt = map.get(simpleWord); if ( cnt!= null ) { cnt.count++; }else { map.put(simpleWord, new Obj(1)); } } } for(String key : map.keySet()) { WordCount wd = new WordCount(key,map.get(key).count); rs.add(wd); } Collections.sort(rs, new java.util.Comparator<WordCount>(){ @Override public int compare(WordCount o1, WordCount o2) { int result = 0 ; if (o1.getCount() > o2.getCount() ) { result = -1; }else if (o1.getCount() < o2.getCount()) { result = 1; }else { int strRs = o1.getWord().compareToIgnoreCase(o2.getWord()); if ( strRs > 0 ) { result = 1; }else { result = -1 ; } } return result; } }); return rs; } public static void main(String args[]) { String word = "Pinterest is might be aa ab aa ab marketer's dream - ths site is largely used to curate products " ; WordsStatistics s = new WordsStatistics(); List<WordCount> rs = s.statistics(word); for(WordCount word1 : rs) { System.out.println(word1.getWord()+"*"+word1.getCount()); } } }