zoukankan      html  css  js  c++  java
  • Lucene.Net + 盘古分词

    转载自:http://blog.csdn.net/pukuimin1226/article/details/17558247/

    1、Nuget Lucene的盘古分析器会自动添加Lucene及分词高亮等引用

    2、将自动添加的 Dict 字典文件复制到输出目录

    3、添加盘古分词及Lucene帮助类

    using Lucene.Net.Analysis;
    using Lucene.Net.Documents;
    using Lucene.Net.Index;
    using Lucene.Net.QueryParsers;
    using Lucene.Net.Search;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Reflection;
    using System.Web;
    
    namespace WebApplication1.Commons
    {
        /// <summary>  
        /// 盘古分词在lucene.net中的使用帮助类  
        /// 调用PanGuLuceneHelper.instance  
        /// </summary>  
        public class PanGuLuceneHelper
        {
            private PanGuLuceneHelper() { }
    
            #region 单一实例  
            private static PanGuLuceneHelper _instance = null;
            /// <summary>  
            /// 单一实例  
            /// </summary>  
            public static PanGuLuceneHelper instance
            {
                get
                {
                    if (_instance == null) _instance = new PanGuLuceneHelper();
                    return _instance;
                }
            }
            #endregion
    
            #region 分词测试  
            /// <summary>  
            /// 分词测试  
            /// </summary>  
            /// <param name="keyword"></param>  
            /// <returns></returns>  
            public string Token(string keyword)
            {
                string ret = "";
                System.IO.StringReader reader = new System.IO.StringReader(keyword);
                Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
                bool hasNext = ts.IncrementToken();
                Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
                while (hasNext)
                {
                    ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                    ret += ita.Term + "|";
                    hasNext = ts.IncrementToken();
                }
                ts.CloneAttributes();
                reader.Close();
                analyzer.Close();
                return ret;
            }
            #endregion
    
            #region 创建索引  
            /// <summary>  
            /// 创建索引  
            /// </summary>  
            /// <param name="datalist"></param>  
            /// <returns></returns>  
            public bool CreateIndex(List<MySearchUnit> datalist)
            {
                IndexWriter writer = null;
                try
                {
                    writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)  
                }
                catch
                {
                    writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示删除之前的重新写入)  
                }
                foreach (MySearchUnit data in datalist)
                {
                    CreateIndex(writer, data);
                }
                writer.Optimize();
                writer.Dispose();
                return true;
            }
    
            public bool CreateIndex(IndexWriter writer, MySearchUnit data)
            {
                try
                {
    
                    if (data == null) return false;
                    Document doc = new Document();
                    Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名      
    
                    //创建类的实例      
                    //object obj = Activator.CreateInstance(type, true);    
                    //获取公共属性      
                    PropertyInfo[] Propertys = type.GetProperties();
                    for (int i = 0; i < Propertys.Length; i++)
                    {
                        //Propertys[i].SetValue(Propertys[i], i, null); //设置值  
                        PropertyInfo pi = Propertys[i];
                        string name = pi.Name;
                        object objval = pi.GetValue(data, null);
                        string value = objval == null ? "" : objval.ToString(); //
                        if (name == "id" || name == "flag")//id在写入索引时必是不分词,否则是模糊搜索和删除,会出现混乱  
                        {
                            doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词  
                        }
                        else
                        {
                            doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
                        }
                    }
                    writer.AddDocument(doc);
                }
                catch (System.IO.FileNotFoundException fnfe)
                {
                    throw fnfe;
                }
                return true;
            }
            #endregion
    
            #region 在title和content字段中查询数据  
            /// <summary>  
            /// 在title和content字段中查询数据  
            /// </summary>  
            /// <param name="keyword"></param>  
            /// <returns></returns>  
            public List<MySearchUnit> Search(string keyword)
            {
    
                string[] fileds = { "title", "content" };//查询字段  
                //Stopwatch st = new Stopwatch();  
                //st.Start();  
                QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询  
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
                Query query = parser.Parse(keyword);
                int n = 1000;
                IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
                TopDocs docs = searcher.Search(query, (Filter)null, n);
                if (docs == null || docs.TotalHits == 0)
                {
                    return null;
                }
                else
                {
                    List<MySearchUnit> list = new List<MySearchUnit>();
                    int counter = 1;
                    foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果  
                    {
                        try
                        {
                            Document doc = searcher.Doc(sd.Doc);
                            string id = doc.Get("id");
                            string title = doc.Get("title");
                            string content = doc.Get("content");
                            string flag = doc.Get("flag");
                            string imageurl = doc.Get("imageurl");
                            string updatetime = doc.Get("updatetime");
    
                            string createdate = doc.Get("createdate");
                            PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color="red">", "</font>");
                            PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                            highlighter.FragmentSize = 50;
                            content = highlighter.GetBestFragment(keyword, content);
                            string titlehighlight = highlighter.GetBestFragment(keyword, title);
                            if (titlehighlight != "") title = titlehighlight;
                            list.Add(new MySearchUnit(id, title, content, flag, imageurl, updatetime));
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        counter++;
                    }
                    return list;
                }
                //st.Stop();  
                //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");  
    
            }
            #endregion
    
            #region 在不同的分类下再根据title和content字段中查询数据(分页)  
            /// <summary>  
            /// 在不同的类型下再根据title和content字段中查询数据(分页)  
            /// </summary>  
            /// <param name="_flag">分类,传空值查询全部</param>  
            /// <param name="keyword"></param>  
            /// <param name="PageIndex"></param>  
            /// <param name="PageSize"></param>  
            /// <param name="TotalCount"></param>  
            /// <returns></returns>  
            public List<MySearchUnit> Search(string _flag, string keyword, int PageIndex, int PageSize, out int TotalCount)
            {
                if (PageIndex < 1) PageIndex = 1;
                //Stopwatch st = new Stopwatch();  
                //st.Start();  
                BooleanQuery bq = new BooleanQuery();
                if (_flag != "")
                {
                    QueryParser qpflag = new QueryParser(version, "flag", analyzer);
                    Query qflag = qpflag.Parse(_flag);
                    bq.Add(qflag, Occur.MUST);//与运算  
                }
                if (keyword != "")
                {
                    string[] fileds = { "title", "content" };//查询字段  
                    QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询  
                    parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询  
                    Query queryKeyword = parser.Parse(keyword);
                    bq.Add(queryKeyword, Occur.MUST);//与运算  
                }
    
                TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
                IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读  
                searcher.Search(bq, collector);
                if (collector == null || collector.TotalHits == 0)
                {
                    TotalCount = 0;
                    return null;
                }
                else
                {
                    int start = PageSize * (PageIndex - 1);
                    //结束数  
                    int limit = PageSize;
                    ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
                    List<MySearchUnit> list = new List<MySearchUnit>();
                    int counter = 1;
                    TotalCount = collector.TotalHits;
                    foreach (ScoreDoc sd in hits)//遍历搜索到的结果  
                    {
                        try
                        {
                            Document doc = searcher.Doc(sd.Doc);
                            string id = doc.Get("id");
                            string title = doc.Get("title");
                            string content = doc.Get("content");
                            string flag = doc.Get("flag");
                            string imageurl = doc.Get("imageurl");
                            string updatetime = doc.Get("updatetime");
    
                            PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color="red">", "</font>");
                            PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                            highlighter.FragmentSize = 50;
                            content = highlighter.GetBestFragment(keyword, content);
                            string titlehighlight = highlighter.GetBestFragment(keyword, title);
                            if (titlehighlight != "") title = titlehighlight;
                            list.Add(new MySearchUnit(id, title, content, flag, imageurl, updatetime));
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        counter++;
                    }
                    return list;
                }
                //st.Stop();  
                //Response.Write("查询时间:" + st.ElapsedMilliseconds + " 毫秒<br/>");  
    
            }
            #endregion
    
            #region 删除索引数据(根据id)  
            /// <summary>  
            /// 删除索引数据(根据id)  
            /// </summary>  
            /// <param name="id"></param>  
            /// <returns></returns>  
            public bool Delete(string id)
            {
                bool IsSuccess = false;
                Term term = new Term("id", id);
                //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);  
                //Version version = new Version();  
                //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询  
                //Query query = parser.Parse("小王");  
    
                //IndexReader reader = IndexReader.Open(directory_luce, false);  
                //reader.DeleteDocuments(term);  
                //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");  
                //reader.Dispose();  
    
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);  
                ////writer.DeleteAll();  
                writer.Commit();
                //writer.Optimize();//  
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
                return IsSuccess;
            }
            #endregion
    
            #region 删除全部索引数据  
            /// <summary>  
            /// 删除全部索引数据  
            /// </summary>  
            /// <returns></returns>  
            public bool DeleteAll()
            {
                bool IsSuccess = true;
                try
                {
                    IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                    writer.DeleteAll();
                    writer.Commit();
                    //writer.Optimize();//  
                    IsSuccess = writer.HasDeletions();
                    writer.Dispose();
                }
                catch
                {
                    IsSuccess = false;
                }
                return IsSuccess;
            }
            #endregion
    
            #region directory_luce  
            private Lucene.Net.Store.Directory _directory_luce = null;
            /// <summary>  
            /// Lucene.Net的目录-参数  
            /// </summary>  
            public Lucene.Net.Store.Directory directory_luce
            {
                get
                {
                    if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                    return _directory_luce;
                }
            }
            #endregion
    
            #region directory  
            private System.IO.DirectoryInfo _directory = null;
            /// <summary>  
            /// 索引在硬盘上的目录  
            /// </summary>  
            public System.IO.DirectoryInfo directory
            {
                get
                {
                    if (_directory == null)
                    {
                        string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
                        if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
                        else _directory = new System.IO.DirectoryInfo(dirPath);
                    }
                    return _directory;
                }
            }
            #endregion
    
            #region analyzer  
            private Analyzer _analyzer = null;
            /// <summary>  
            /// 分析器  
            /// </summary>  
            public Analyzer analyzer
            {
                get
                {
                    //if (_analyzer == null)  
                    {
                        _analyzer = new Lucene.Net.Analysis.PanGuAnalyzer();//盘古分词分析器  
                        //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器  
                    }
                    return _analyzer;
                }
            }
            #endregion
    
            #region version  
            private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
            /// <summary>  
            /// 版本号枚举类  
            /// </summary>  
            public Lucene.Net.Util.Version version
            {
                get
                {
                    return _version;
                }
            }
            #endregion
        }
    
        #region 索引的一个行单元,相当于数据库中的一行数据  
        /// <summary>  
        /// 索引的一个行单元,相当于数据库中的一行数据  
        /// </summary>  
        public class MySearchUnit
        {
            public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
            {
                this.id = _id;
                this.title = _title;
                this.content = _content;
                this.flag = _flag;
                this.imageurl = _imageurl;
                this.updatetime = _updatetime;
            }
            /// <summary>  
            /// 唯一的id号  
            /// </summary>  
            public string id { get; set; }
            /// <summary>  
            /// 标题  
            /// </summary>  
            public string title { get; set; }
            /// <summary>  
            /// 内容  
            /// </summary>  
            public string content { get; set; }
            /// <summary>  
            /// 其他信息  
            /// </summary>  
            public string flag { get; set; }
            /// <summary>  
            /// 图片路径  
            /// </summary>  
            public string imageurl { get; set; }
            /// <summary>  
            /// 时间  
            /// </summary>  
            public string updatetime { get; set; }
        }
        #endregion
    }

    4、使用 创建索引及搜索

            public ActionResult Index()
            {
                //PanGuLuceneHelper.instance.DeleteAll();//删除全部  
    
                //PanGuLuceneHelper.instance.Delete("1d");//根据id删除  
                bool exec = false;
                if (exec)
                {
                    List<MySearchUnit> list = new List<MySearchUnit>();
                    list.Add(new MySearchUnit("1a", "标题小王", "今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));
                    list.Add(new MySearchUnit("1b", "标题小张", "今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。", new Random().Next(1, 10).ToString(), "", ""));
                    list.Add(new MySearchUnit("1c", "标题小王", "今天是小王的生日,大家都很高兴去他家喝酒,玩了一整天。", new Random().Next(1, 10).ToString(), "", ""));
                    list.Add(new MySearchUnit("1d", "标题小张", "今天是小张的生日,大家都很高兴去他家喝酒,玩了几天。", new Random().Next(1, 10).ToString(), "", ""));
                    PanGuLuceneHelper.instance.CreateIndex(list);//添加索引  
                }
                int count = 0;
                int PageIndex = 1;
                int PageSize = 4;
                string html_content = "";
                List<MySearchUnit> searchlist = PanGuLuceneHelper.instance.Search("", "几天", PageIndex, PageSize, out count);
                html_content += ("查询结果:" + count + "条数据<br/>");
                if (searchlist == null || searchlist.Count == 0)
                {
                    html_content += ("未查询到数据。<br/>");
                }
                else
                {
                    foreach (MySearchUnit data in searchlist)
                    {
                        html_content += (string.Format("id:{0},title:{1},content:{2},flag:{3},updatetime:{4}<br/>", data.id, data.title, data.content, data.flag, data.updatetime));
                    }
                }
                html_content += (PanGuLuceneHelper.instance.version);
                return  Content(html_content);
            }
  • 相关阅读:
    SQL Server:创建索引视图
    Asp.Net常用函数
    SQL Server联机丛书:删除存储过程
    音乐知识全接触
    深入透析样式表滤镜
    有一天,爸妈会变老
    今天终于买到票啦~~
    今天,回到上海啦~~(附工作生涯回顾)
    十八问:怎么才是喜欢编程
    把旧光驱改CD播放机的方法
  • 原文地址:https://www.cnblogs.com/ideacore/p/7644215.html
Copyright © 2011-2022 走看看