zoukankan      html  css  js  c++  java
  • Lucene——索引的创建、删除、修改

    package cn.tz.lucene;
    
    import java.io.File;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.commons.io.FileUtils;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.document.LongField;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.junit.Test;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    public class IndexManagerTest {
    
    	@Test
    	public void testIndexCreate() throws Exception{
    		 //采集文件系统中的文档数据到Lucene中
    		 //创建文档列表
    		 List<Document> docList=new ArrayList<Document>();
    		 //指定文件目录
    		File dir=new File("C:\Users\admin\searchsource");
    		 //循环文件夹
    		for(File file:dir.listFiles()){
    			String fileName = file.getName();
    			String fileContent=FileUtils.readFileToString(file);
    			Long fileSize=FileUtils.sizeOf(file);
    			//创建文档对象
    			Document document=new Document();
    			TextField namefield=new TextField("fileName",fileName,Store.YES);
    			TextField contentField=new TextField("fileContent",fileContent,Store.YES);
    			LongField sizeField=new LongField("fileSize",fileSize,Store.YES);
    			//LongField 
    			document.add(namefield);
    			document.add(contentField);
    			document.add(sizeField);
    			docList.add(document);
    		}
    		
    		//创建分词器Analyzer
    		// Analyzer analyzer=new StandardAnalyzer();
    		
    		//采用第三方的中文分词器 IKAnalyzer
    		Analyzer analyzer=new IKAnalyzer();
    		 //指定索引和文档的存储目录
    		 FSDirectory desFile=FSDirectory.open(new File("d:\lucene"));
    		 //创建写对象的初始化对象
    		 IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_3,analyzer);
    		 //创建索引和文档的写对象
    		 IndexWriter writer=new IndexWriter(desFile,config);
    		 //将文档 加到索引和文档的写对象中
    		for(Document doc:docList){
    			writer.addDocument(doc);
    		}
    		//提交
    		writer.commit();
    		//关闭流
    		writer.close();
    	}
    	
    	@Test
    	public void testIndexDel() throws Exception{
    		Analyzer analyzer=new IKAnalyzer();
    		FSDirectory dir=FSDirectory.open(new File("d:\lucene"));
    		IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
    		IndexWriter writer=new IndexWriter(dir, config);
    		
    		//删除所有索引
    		//writer.deleteAll();
    		
    		//删除指定索引(根据域删除)
    		//Term("域名","搜索的关键字")
    		writer.deleteDocuments(new Term("fileName","apache"));
    		//提交
    		writer.commit();
    		//关闭
    		writer.close();
    		
    	}
    	
    	/**
    	 * 更新操作<br>:
    	 *  <li>按照Term进行指定域搜索关键字,如果查到记录就删除,然后将更新后的内容重新生成Document对象</li>
    	 *  <li>如果没有查到记录,则直接将更新后的内容添加一个Document对象</li>
    	 */
    	@Test
    	public void testIndexUpdate() throws Exception{
    		Analyzer analyzer=new IKAnalyzer();
    		//存储目录
    		FSDirectory dir=FSDirectory.open(new File("d:\lucene"));
    		IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
    		IndexWriter writer=new IndexWriter(dir,config);
    		
    		//按照fileName域进行搜索关键字"web"
    		Term term=new Term("fileName","哦哦");
    		
    		Document doc=new Document();
    		doc.add(new TextField("fileName","not exit",Store.YES));
    		doc.add(new LongField("fileSize",100L,Store.YES));
    		doc.add(new StringField("fileContent", "egfao容", Store.YES));
    		//更新
    		writer.updateDocument(term, doc);	
    		//提交
    		writer.commit();
    		//关闭
    		writer.close();
    	}
    }
    

      

  • 相关阅读:
    instanceof操作符判断对象类型
    继承
    题解 P3943 星空
    NOIP 模拟 10 考试总结
    题解 P3942 将军令
    题解 P3941 入阵曲
    题解 P3191 [HNOI2007]紧急疏散EVACUATE
    NOIP 模拟 9 考试总结
    NOIP 模拟 9 分组
    NOIP 模拟 9 数颜色
  • 原文地址:https://www.cnblogs.com/yuefeng123/p/8311115.html
Copyright © 2011-2022 走看看