zoukankan html css js c++ java

CJCMS系列说说项目中如何使用搜索引擎

　　开源全文搜索引擎，比较火的当属Lucene.net。

　　Lucene.net是Lucene的.net移植版本，是一个开源的全文检索引擎开发包，即它不是一个完整的全文检索引擎，而是一个全文检索引擎的架构，提供了完整的查询引擎和索引引擎。开发人员可以基于Lucene.net实现全文检索的功能。

　　Lucene.net并不是一个爬行搜索引擎，也不会自动地索引内容。我们得先将要索引的文档中的文本抽取出来，然后再将其加到Lucene.net索引中。标准的步骤是先初始化一个Analyzer、打开一个IndexWriter、然后再将文档一个接一个地加进去。一旦完成这些步骤，索引就可以在关闭前得到优化，同时所做的改变也会生效。这个过程可能比开发者习惯的方式更加手工化一些，但却在数据的索引上给予你更多的灵活性。

　　比较麻烦的是，Lucene.net关于中文的分词，我好想没有找到，所以我从网上发现了盘古分词，盘古分词据说在中文分词上面做的很好，所以我也就将他引入到项目中来了。

　　首先是索引元，索引元怎么实现呢？为了实现通用的索引元数据，我使用了如下的抽象。

 1 using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using Lucene.Net.Documents;
 6 
 7 namespace CJCMS.Framework.Lucene
 8 {
 9     public interface IIndexItem
10     {
11         string Id { get; set; }
12 
13         List<Field> IndexItemsFieldList { get; set; }
14 
15         void InitIndexItem();
16     }
17 }

 1 using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using Lucene.Net.Documents;
 6 
 7 namespace CJCMS.Framework.Lucene
 8 {
 9     public interface IIndexManager
10     {
11         string IndexPath { get; set; }
12 
13         void Add(IIndexItem item);
14 
15         void Update(IIndexItem item);
16 
17         void Delete(IIndexItem item);
18 
19         void Delete(string Id);
20 
21         List<Document> Fetch(string keywords, int pageNum, int count);
22         
23     }
24 }

 1 using System;
 2 using System.Collections.Generic;
 3 using System.Linq;
 4 using System.Text;
 5 using Lucene.Net.Search;
 6 using Lucene.Net.Index;
 7 using Lucene.Net.Analysis;
 8 using Lucene.Net.Analysis.PanGu;
 9 using Lucene.Net.Documents;
10 using Lucene.Net.QueryParsers;
11 
12 namespace CJCMS.Framework.Lucene
13 {
14     public class DefaultIndexManager:IIndexManager
15     {
16         private string indexpath = "";
17 
18         public string IndexPath 
19         {
20             get { return this.indexpath; }
21             set { value = this.indexpath; }
22         }
23 
24         public DefaultIndexManager(string indexpath)
25         {
26             IndexPath = indexpath;
27         }
28 
29         public void Add(IIndexItem item)
30         {
31             IndexWriter indexwriter = new IndexWriter(IndexPath, new PanGuAnalyzer(), true);
32             Document doc = new Document();
33             foreach (Field f in item.IndexItemsFieldList)
34             {
35                 doc.Add(f);
36             }
37             indexwriter.AddDocument(doc);
38         }
39 
40         public void Update(IIndexItem item)
41         {
42             Delete(item);
43             Add(item);
44         }
45 
46         public void Delete(IIndexItem item)
47         {
48             IndexWriter indexwriter = new IndexWriter(IndexPath, new PanGuAnalyzer(), true);
49             QueryParser queryParser = new QueryParser("ItemId", new PanGuAnalyzer()); 
50             Query query = queryParser.Parse(item.Id);
51             indexwriter.DeleteDocuments(query);
52         }
53 
54         public void Delete(string Id)
55         { 
56             IndexWriter indexwriter = new IndexWriter(IndexPath, new PanGuAnalyzer(), true);
57             QueryParser queryParser = new QueryParser("ItemId", new PanGuAnalyzer()); 
58             Query query = queryParser.Parse(Id);
59             indexwriter.DeleteDocuments(query);
60         }
61         /// <summary>
62         /// 分页搜索
63         /// </summary>
64         /// <param name="keywords"></param>
65         /// <param name="pageNum">初始为1</param>
66         /// <param name="count"></param>
67         /// <returns></returns>
68         public List<Document> Fetch(string keywords, int pageNum, int count)
69         {
70             IndexSearcher search = new IndexSearcher(IndexPath); 
71             QueryParser queryParser = new QueryParser("ItemId", new PanGuAnalyzer());
72             Query query = queryParser.Parse(keywords);
73             Hits hits = search.Search(query);
74             List<Document> list=new List<Document>();
75             for(int i=(pageNum-1)*count;i<hits.Length();i++)
76             {
77               list.Add(hits.Doc(i));
78             }
79             return list;
80         }
81     }
82 }

查看全文

相关阅读:
Oracle RMAN快速入门指南
 防止SQL注入
 脚本获取参数　
 回车下一个
 DataTable
C# Eval
C#过滤所有HTML代码的函数
 反射
 gridview格式化问题
 绑定

原文地址：https://www.cnblogs.com/ntcj/p/2673234.html