之前的jar包有问题,现已修改.
需要的jar包,已修改
自己去Maven中央仓库下载jar包.
excel数据:
直接上代码.
程序再度优化了一遍.之后如果想再度精准,可能需要建模,最近没空继续做了.
实体类:
package org.analysisitem20181016.pojo; public class Item { private int index; private int match_text_length; private String item_name; private String activity_id; private String type; private String user_id; private String selled_count; private int similarity; private String matchText; public String getItem_name() { return item_name; } public void setItem_name(String item_name) { this.item_name = item_name; } public String getActivity_id() { return activity_id; } public void setActivity_id(String activity_id) { this.activity_id = activity_id; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getUser_id() { return user_id; } public void setUser_id(String user_id) { this.user_id = user_id; } public String getSelled_count() { return selled_count; } public void setSelled_count(String selled_count) { this.selled_count = selled_count; } public int getSimilarity() { return similarity; } public void setSimilarity(int similarity) { this.similarity = similarity; } public String getMatchText() { return matchText; } public void setMatchText(String matchText) { this.matchText = matchText; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public int getMatch_text_length() { return match_text_length; } public void setMatch_text_length(int match_text_length) { this.match_text_length = match_text_length; } }
线程处理类(改良后使用了calculate2方法来匹配):
package org.analysisitem20181016.main; import org.analysisitem20181016.pojo.Item; public class ThreadMain implements Runnable{ private int index; private Item item; public ThreadMain(int index, Item item){ this.index = index; this.item = item; } @Override public void run() { System.out.println("任务" + index + "开始执行!"); for(int i = 0; i < CompareMain.itemList.size(); i++){ if(i == index){ continue; } String text = item.getItem_name(); String text2 = CompareMain.itemList.get(i).getItem_name(); String initText = null; String initText2 = null; if(text.length() <= text2.length()){ initText = text; initText2 = text2; }else{ initText = text2; initText2 = text; } // String calculatedText = calculate(initText, initText, initText2, 0, 2); String calculatedText = calculate2(initText, initText, initText2, 0, 2); /*if(initText.equals("蒜瓣肉")){ System.out.println(item.getSimilarity()); if(item.getSimilarity() > 9){ System.out.println("initText:" + initText); System.out.println("text:" + text); System.out.println("text2:" + text2); } }*/ if(calculatedText != null && calculatedText.equals("")){ calculatedText = "无匹配数据"; } if(calculatedText != null && !calculatedText.equals("无匹配数据")){ // System.out.println("匹配字符串:" + calculatedText); item.setMatchText(calculatedText); item.setSimilarity(item.getSimilarity() + 1); } } /*if(item.getItem_name().equals("蒜瓣肉") && item.getSimilarity() > 9){ System.out.println("相似数量:" + item.getSimilarity()); }*/ CompareMain.calculatedItemList.add(item); } public static String calculate2(String initText, String text, String initText2, int beginIndex, int len){ String subText = null; if(initText2.contains(text)){ if(initText.equals("芹菜文") && initText2.equals("芹菜文")){ System.out.println(4); System.out.println("4最后结果:" + text); System.out.println("4结束!"); } return text; }else{ while(initText.length() < len){ len--; } if(len >= CompareMain.minTextLen){ if(initText.equals("芹菜文")){ System.out.println(1); } if(beginIndex + len < initText.length()){ subText = initText.substring(beginIndex, beginIndex + len); beginIndex++; return calculate2(initText, subText, initText2, beginIndex, len); }else if(beginIndex + len >= initText.length()){ subText = initText.substring(beginIndex); beginIndex = 0; len--; return calculate2(initText, subText, initText2, beginIndex, len); } } } return null; } public static String calculate(String initText, String text, String text2, int beginIndex, int len){ if(text2.contains(text)){ return text; }else{ String subText = null; if(len < initText.length()){ if(beginIndex + len < initText.length()){ subText = initText.substring(beginIndex, beginIndex + len); }else{ subText = initText.substring(beginIndex); } // System.out.println("subText:" + subText); if(subText.length() == len){ // System.out.println("subText.length():" + subText.length()); beginIndex++; return calculate(initText, subText, text2, beginIndex, len); } } } return null; } }
修复了一个bug.
分析主类(改变了一点代码,逻辑没变):
package org.analysisitem20181016.main; import java.io.File; import java.io.FileOutputStream; import java.util.ArrayList; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.analysisitem20181016.pojo.Item; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; public class CompareMain{ public static ArrayList<Item> itemList = new ArrayList<Item>(); private static String replaceReg = "[^u4e00-u9fa5]+"; public static int maxTextLen = 4; public static int minTextLen = 2; public static ArrayList<Item> calculatedItemList = new ArrayList<Item>(); public static void main(String[] args){ try{ CompareMain compareMain = new CompareMain(); compareMain.readExcel(); // compareMain.compare(); compareMain.subsectionCalculate(); compareMain.show(); compareMain.writeExcel(); }catch(Exception e) { e.printStackTrace(); } } public void writeExcel() throws Exception{ File file = new File("G:/Database/Item20181016_YangBing/notitle2.xls"); Workbook wb = new HSSFWorkbook(); Sheet sheet = wb.createSheet(); Row row = sheet.createRow(0); Cell cell = row.createCell(0); cell.setCellValue("item_name"); cell = row.createCell(1); cell.setCellValue("activity_id"); cell = row.createCell(2); cell.setCellValue("type"); cell = row.createCell(3); cell.setCellValue("user_id"); cell = row.createCell(4); cell.setCellValue("selled_count"); cell = row.createCell(5); cell.setCellValue("相似数量"); cell = row.createCell(6); cell.setCellValue("匹配字符串"); for (int i = 0; i < calculatedItemList.size(); i++) { Item item = calculatedItemList.get(i); if(item != null){ row = sheet.createRow(i + 1); cell = row.createCell(0); cell.setCellValue(item.getItem_name()); cell = row.createCell(1); cell.setCellValue(item.getActivity_id()); cell = row.createCell(2); cell.setCellValue(item.getType()); cell = row.createCell(3); cell.setCellValue(item.getUser_id()); cell = row.createCell(4); cell.setCellValue(item.getSelled_count()); cell = row.createCell(5); /*if(item.getItem_name().equals("蒜瓣肉")){ System.out.println("相似数量:" + item.getSimilarity()); }*/ cell.setCellValue(item.getSimilarity()); cell = row.createCell(6); cell.setCellValue(item.getMatchText()); } } FileOutputStream fos = new FileOutputStream(file); wb.write(fos); fos.flush(); fos.close(); wb.close(); System.out.println("写入Excel文件完成!"); } public void show(){ // System.out.println(calculatedItemList.size()); for(Item item : calculatedItemList){ if(item != null){ // System.out.println("item_name:" + item.getItem_name() + ",匹配字符串:" + item.getMatchText() + ",count:" + item.getSimilarity()); } } } public void subsectionCalculate() throws Exception{ LinkedBlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<Runnable>(); int size = itemList.size(); ThreadPoolExecutor executor = new ThreadPoolExecutor(size, size, 7200, TimeUnit.SECONDS, workQueue); for(int i = 0; i < itemList.size(); i++){ Item outerItem = itemList.get(i); ThreadMain threadMain = new ThreadMain(i, outerItem); executor.execute(threadMain); } while(true){ if(executor.getCompletedTaskCount() >= size){ executor.shutdown(); executor.shutdownNow(); break; } Thread.sleep(1000); } } /*public void compare(){ System.out.println("正在比较中..."); for(int i = 0; i < itemList.size(); i++){ Item outerItem = itemList.get(i); for(int j = i + 1; j < itemList.size(); j++){ Item innerItem = itemList.get(j); String outerItemName = outerItem.getItem_name(); String innerItemName = innerItem.getItem_name(); if(!filtered){ outerItemName = outerItemName.replaceAll(replaceReg, ""); innerItemName = innerItemName.replaceAll(replaceReg, ""); } // int count = calculate(outerItemName, innerItemName, initialLen); outerItem.setSimilarity(outerItem.getSimilarity() + count); } // calculatedItemList.add(outerItem); } System.out.println("计算完毕!"); }*/ public void readExcel() throws Exception{ File file = new File("G:/Database/Item20181016_YangBing/notitle.xls"); POIFSFileSystem fs = new POIFSFileSystem(file); Workbook wb = new HSSFWorkbook(fs); // int sheet_size = wb.getNumberOfSheets(); Sheet sheet = wb.getSheetAt(0); for(int i = 1; i < sheet.getPhysicalNumberOfRows(); i++){ Row row = sheet.getRow(i); Item item = new Item(); for(int j = 0; j < row.getLastCellNum(); j++){ Cell cell = row.getCell(j); if(j == 0){ String item_name = cell.getStringCellValue(); item_name = item_name.replaceAll(replaceReg, ""); item.setItem_name(item_name); }else if(j == 1){ double activity_id = cell.getNumericCellValue(); item.setActivity_id((long)activity_id + ""); }else if(j == 2){ String type = cell.getStringCellValue(); item.setType(type); }else if(j == 3){ double user_id = cell.getNumericCellValue(); item.setUser_id((long)user_id + ""); }else if(j == 4){ double selled_count = cell.getNumericCellValue(); item.setSelled_count((long)selled_count + ""); } } itemList.add(item); } wb.close(); fs.close(); } }
现在可以匹配多个字符了,会有一点bug,暂时没空解决.
好了,有兴趣的自己看代码吧!
解析结果:
非常有问题,但是暂时没空也没心思解决.