zoukankan html css js c++ java

Lucene实战之基于StandardAnalyzer读写索引

前言

使用lucene创建索引时如果指定了解析器，则需要读写都使用这个解析器，目前我发现也就是在处理中文这块比较麻烦，像你在使用solr时如果配置了ik分词，则需要把index清空重新创建才能继续搜索。

本篇引用lucene-6.4.0和4.x的几个关键类会有不同的地方。

创建索引

 1  public void index(){
 2 
 3         Directory dir=null;
 4         Analyzer analyzer=null;
 5         IndexWriterConfig config=null;
 6         IndexWriter indexWriter=null;
 7         try{
 8             /**
 9              * SimpleFSDirectory 不能很好支持多线程操作
10              * **/
11             dir =new SimpleFSDirectory(Paths.get(INDEX_URL));
12 
13             analyzer=new StandardAnalyzer();
14             config =new IndexWriterConfig(analyzer);
15             /**
16              * IndexWriter(Directory d,IndexWriterConfig config)
17              * **/
18             indexWriter =new IndexWriter(dir,config);
19 
20             indexWriter.deleteAll();
21             List<UploadBook> books =bookDao.listAllBooks();
22             Document document=null;
23 
24             SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
25 
26             for(UploadBook book:books){
27                 document=new Document();
28                 document.add(new Field("id",book.getId().toString(), TextField.TYPE_STORED));
29                 document.add(new Field("ip",book.getIp(), TextField.TYPE_STORED));
30                 document.add(new Field("title",book.getOriginFileName(), TextField.TYPE_STORED));
31 
32                 document.add(new Field("content", PdfReader.read(INDEX_PDF+book.getNewFileName()),TextField.TYPE_STORED));
33                 document.add(new Field("createtime",formatter.format(book.getCreateTime()), TextField.TYPE_STORED));
34 
35                 indexWriter.addDocument(document);
36             }
37 
38             indexWriter.commit();
39 
40             System.out.println("======索引创建完成，公创建"+books.size()+"条索引========");
41         }catch (IOException ex){
42             ex.printStackTrace();
43         }
44         catch(Exception ex){
45             ex.printStackTrace();
46         }finally {
47             if(indexWriter !=null){
48                 try{
49                     indexWriter.close();
50                 }catch (IOException ex){
51                     System.out.println("======indexWriter close exception========");
52                 }
53             }
54         }
55 
56     }

读取索引

 1  public static List<Book> search2(String kw){
 2         Directory dir=null;
 3         Analyzer analyzer=null;
 4         List<Book> list = new ArrayList<Book>();
 5         try{
 6             dir= FSDirectory.open(Paths.get("e:\soso\index"));
 7             analyzer=new StandardAnalyzer();
 8 
 9             DirectoryReader reader =DirectoryReader.open(dir);
10             IndexSearcher searcher=new IndexSearcher(reader);
11 
12             QueryParser parser=new QueryParser("content",analyzer);
13             Query query =parser.parse(kw);
14 
15             ScoreDoc[] docs=searcher.search(query,100).scoreDocs;
16 
17             for (int i = 0; i < docs.length; i++) {
18                 Document firstHit = searcher.doc(docs[i].doc);
19 
20                 Book book=new Book();
21                 book.setId(Integer.parseInt(firstHit.getField("id").stringValue()));
22                 book.setIp(firstHit.getField("ip").stringValue());
23 
24                 String title=firstHit.getField("title").stringValue();
25                 title=title.substring(0,title.lastIndexOf("."));
26                 book.setTitle(title);
27 
28                 String content=firstHit.getField("content").stringValue();
29                 if(content.length()>=500){
30                     content=content.substring(0,500)+"......";
31                 }
32                 book.setContent(content);
33 
34                 SimpleDateFormat format=new SimpleDateFormat("yyyy-MM-mm");
35                 Date date =format.parse(firstHit.getField("createtime").stringValue());
36                 book.setCreateTime(format.format(date));
37 
38                 list.add(book);
39 
40             }
41 
42         }catch(Exception ex){
43 
44         }finally {
45             try{
46                 dir.close();
47 
48             }catch(IOException ex){
49                 ex.printStackTrace();
50             }
51         }
52 
53         return list;
54     }

查看全文

相关阅读:
【一天一道兼容性】之——IE6下fixed失效
 【前端重构技能天赋】（三）——最终篇
 Putty中文乱码问题
 Cygwin Application initialization failed: no display name and no $DISPLAY environment
c++中的string用法(二)
在win7下面使用cygwin，并且安装使用git，以及git简明教程
 vi 一些命令（备忘，自己用的）
对C++中string类型的总结
 ofstream和ifstream详细用法
 写第一个shell脚本，遇到的问题总结整理。

原文地址：https://www.cnblogs.com/sword-successful/p/6961143.html