zoukankan      html  css  js  c++  java
  • 全文检索 使用最新lucene3.0.3+最新盘古分词 pangu2.4 .net 实例

    开发环境 vs2015 winform 程序

    1 首先需要下载对应的DLL 文章后面统一提供程序下载地址 里面都有

    2 配置pangu的参数 也可以不配置 采用默认的即可

    3 创建索引,将索引存放到本地

    4 根据关键字查询本地索引

    5 取得查询结果并展示

    以上是主要的步骤,下面贴上主要代码,拿来即可用

    Form1.Designer.cs

    namespace lucuneTest
    {
        partial class Form1
        {
            /// <summary>
            /// 必需的设计器变量。
            /// </summary>
            private System.ComponentModel.IContainer components = null;
    
            /// <summary>
            /// 清理所有正在使用的资源。
            /// </summary>
            /// <param name="disposing">如果应释放托管资源,为 true;否则为 false。</param>
            protected override void Dispose(bool disposing)
            {
                if (disposing && (components != null))
                {
                    components.Dispose();
                }
                base.Dispose(disposing);
            }
    
            #region Windows 窗体设计器生成的代码
    
            /// <summary>
            /// 设计器支持所需的方法 - 不要修改
            /// 使用代码编辑器修改此方法的内容。
            /// </summary>
            private void InitializeComponent()
            {
                this.btnSearch = new System.Windows.Forms.Button();
                this.panel1 = new System.Windows.Forms.Panel();
                this.textBox2 = new System.Windows.Forms.TextBox();
                this.panel2 = new System.Windows.Forms.Panel();
                this.txtWords = new System.Windows.Forms.TextBox();
                this.btnCutWords = new System.Windows.Forms.Button();
                this.txtWords2 = new System.Windows.Forms.TextBox();
                this.txtResult = new System.Windows.Forms.TextBox();
                this.label3 = new System.Windows.Forms.Label();
                this.label4 = new System.Windows.Forms.Label();
                this.panel1.SuspendLayout();
                this.panel2.SuspendLayout();
                this.SuspendLayout();
                // 
                // btnSearch
                // 
                this.btnSearch.Location = new System.Drawing.Point(413, 48);
                this.btnSearch.Name = "btnSearch";
                this.btnSearch.Size = new System.Drawing.Size(75, 23);
                this.btnSearch.TabIndex = 3;
                this.btnSearch.Text = "查询";
                this.btnSearch.UseVisualStyleBackColor = true;
                this.btnSearch.Click += new System.EventHandler(this.btnSearch_Click);
                // 
                // panel1
                // 
                this.panel1.Controls.Add(this.label3);
                this.panel1.Controls.Add(this.label4);
                this.panel1.Controls.Add(this.txtWords2);
                this.panel1.Controls.Add(this.btnCutWords);
                this.panel1.Controls.Add(this.txtWords);
                this.panel1.Controls.Add(this.textBox2);
                this.panel1.Controls.Add(this.btnSearch);
                this.panel1.Dock = System.Windows.Forms.DockStyle.Top;
                this.panel1.Location = new System.Drawing.Point(0, 0);
                this.panel1.Name = "panel1";
                this.panel1.Size = new System.Drawing.Size(884, 92);
                this.panel1.TabIndex = 5;
                // 
                // textBox2
                // 
                this.textBox2.Location = new System.Drawing.Point(36, 29);
                this.textBox2.Name = "textBox2";
                this.textBox2.Size = new System.Drawing.Size(328, 21);
                this.textBox2.TabIndex = 5;
                this.textBox2.Text = "天龙八部";
                // 
                // panel2
                // 
                this.panel2.Controls.Add(this.txtResult);
                this.panel2.Dock = System.Windows.Forms.DockStyle.Fill;
                this.panel2.Location = new System.Drawing.Point(0, 92);
                this.panel2.Name = "panel2";
                this.panel2.Size = new System.Drawing.Size(884, 378);
                this.panel2.TabIndex = 6;
                // 
                // txtWords
                // 
                this.txtWords.Location = new System.Drawing.Point(548, 12);
                this.txtWords.Multiline = true;
                this.txtWords.Name = "txtWords";
                this.txtWords.ScrollBars = System.Windows.Forms.ScrollBars.Both;
                this.txtWords.Size = new System.Drawing.Size(324, 38);
                this.txtWords.TabIndex = 7;
                // 
                // btnCutWords
                // 
                this.btnCutWords.Location = new System.Drawing.Point(413, 19);
                this.btnCutWords.Name = "btnCutWords";
                this.btnCutWords.Size = new System.Drawing.Size(75, 23);
                this.btnCutWords.TabIndex = 8;
                this.btnCutWords.Text = "分词--》";
                this.btnCutWords.UseVisualStyleBackColor = true;
                this.btnCutWords.Click += new System.EventHandler(this.btnCutWords_Click);
                // 
                // txtWords2
                // 
                this.txtWords2.Location = new System.Drawing.Point(548, 51);
                this.txtWords2.Multiline = true;
                this.txtWords2.Name = "txtWords2";
                this.txtWords2.ScrollBars = System.Windows.Forms.ScrollBars.Both;
                this.txtWords2.Size = new System.Drawing.Size(324, 38);
                this.txtWords2.TabIndex = 9;
                // 
                // txtResult
                // 
                this.txtResult.Dock = System.Windows.Forms.DockStyle.Fill;
                this.txtResult.Location = new System.Drawing.Point(0, 0);
                this.txtResult.Multiline = true;
                this.txtResult.Name = "txtResult";
                this.txtResult.ScrollBars = System.Windows.Forms.ScrollBars.Both;
                this.txtResult.Size = new System.Drawing.Size(884, 378);
                this.txtResult.TabIndex = 8;
                // 
                // label3
                // 
                this.label3.AutoSize = true;
                this.label3.Location = new System.Drawing.Point(513, 24);
                this.label3.Name = "label3";
                this.label3.Size = new System.Drawing.Size(29, 12);
                this.label3.TabIndex = 11;
                this.label3.Text = "盘古";
                // 
                // label4
                // 
                this.label4.AutoSize = true;
                this.label4.Location = new System.Drawing.Point(513, 51);
                this.label4.Name = "label4";
                this.label4.Size = new System.Drawing.Size(29, 12);
                this.label4.TabIndex = 10;
                this.label4.Text = "标准";
                // 
                // Form1
                // 
                this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 12F);
                this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
                this.ClientSize = new System.Drawing.Size(884, 470);
                this.Controls.Add(this.panel2);
                this.Controls.Add(this.panel1);
                this.Name = "Form1";
                this.Text = "Form1";
                this.panel1.ResumeLayout(false);
                this.panel1.PerformLayout();
                this.panel2.ResumeLayout(false);
                this.panel2.PerformLayout();
                this.ResumeLayout(false);
    
            }
    
            #endregion
            private System.Windows.Forms.Button btnSearch;
            private System.Windows.Forms.Panel panel1;
            private System.Windows.Forms.Panel panel2;
            private System.Windows.Forms.TextBox textBox2;
            private System.Windows.Forms.Button btnCutWords;
            private System.Windows.Forms.TextBox txtWords;
            private System.Windows.Forms.TextBox txtWords2;
            private System.Windows.Forms.TextBox txtResult;
            private System.Windows.Forms.Label label3;
            private System.Windows.Forms.Label label4;
        }
    }
    View Code

    Form1.cs

    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.Standard;
    using Lucene.Net.Store;
    using LN = Lucene.Net;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Text;
    using System.Windows.Forms;
    using Lucene.Net.Index;
    using Lucene.Net.Documents;
    using Lucene.Net.Analysis.PanGu;
    using Lucene.Net.Search;
    using PanGu;
    using Lucene.Net.QueryParsers;
    using PanGu.HighLight;
    using System.Diagnostics;
    
    namespace lucuneTest
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
                //定义盘古分词的xml引用路径
                PanGu.Segment.Init(PanGuXmlPath);
                //创建索引
                createIndex();
            }
    
            /// <summary>
            /// 创建索引
            /// </summary>
            void createIndex()
            {
              
                //IndexWriter第三个参数:true指重新创建索引,false指从当前索引追加....此处为新建索引所以为true,后续应该建立的索引应采用追加
                IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                for (int i = 1; i < 101; i++)
                {
                    AddIndex(writer, "我的标题" + i, i + "这是我的标题啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                    AddIndex(writer, "射雕英雄传作者金庸" + i, i + "我是欧阳锋" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                    AddIndex(writer, "天龙八部12" + i, i + "慕容废墟,上官静儿,打撒飞艾丝凡爱上,虚竹" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                    AddIndex(writer, "倚天屠龙记12" + i, i + "张无忌机" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                    AddIndex(writer, "三国演义" + i, i + "刘备,张飞,关羽还有谁来着 忘记啦" + i, DateTime.Now.AddDays(i).ToString("yyyy-MM-dd"));
                }
                //释放资源
                writer.Optimize();
                writer.Dispose();
                string time = ((double)sw.ElapsedMilliseconds / 1000).ToString();
                sw.Stop();
                Console.WriteLine("创建100条记录需要时长:" + time + "");
            }
    
            /// <summary>
            /// 创建索引
            /// </summary>
            /// <param name="analyzer"></param>
            /// <param name="title"></param>
            /// <param name="content"></param>
            private void AddIndex(IndexWriter writer, string title, string content, string date)
            {
                try
                {
                    Document doc = new Document();
                    doc.Add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));//存储且索引
                    doc.Add(new Field("addtime", date, Field.Store.YES, Field.Index.NOT_ANALYZED));//不分词存储
                    writer.AddDocument(doc);
                }
                catch (FileNotFoundException fnfe)
                {
                    throw fnfe;
                }
                catch (Exception ex)
                {
                    throw ex;
                }
            }
    
            /// <summary>
            /// 分词测试
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void btnCutWords_Click(object sender, EventArgs e)
            {
                this.txtWords.Text = "";
                Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                txtWords.Text = cutWords(this.textBox2.Text, PanGuAnalyzer);//盘古分词
                txtWords2.Text = cutWords(this.textBox2.Text, analyzer);    //自带标准分词
            }
    
            /// <summary>
            /// 分词方法
            /// </summary>
            /// <param name="words">待分词内容</param>
            /// <param name="analyzer"></param>
            /// <returns></returns>
            private string cutWords(string words, Analyzer analyzer)
            {
                string resultStr = "";
                System.IO.StringReader reader = new System.IO.StringReader(words);
                Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(words, reader);
                bool hasNext = ts.IncrementToken();
                Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
                while (hasNext)
                {
                    ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                    resultStr += ita.Term + "|";
                    hasNext = ts.IncrementToken();
                }
                ts.CloneAttributes();
                reader.Close();
                analyzer.Close();
                return resultStr;
            }
    
    
            protected IList<Article> list = new List<Article>();
    
    
    
            /// <summary>
            /// 查询多个字段
            /// </summary>
            private void SearchIndex(string searchKey)
            {
                Dictionary<string, string> dic = new Dictionary<string, string>();
                BooleanQuery bQuery = new BooleanQuery();
    
                #region 一个字段查询 
                //if (!string.IsNullOrEmpty(title))
                //{
                //    title = GetKeyWordsSplitBySpace(title);
                //    QueryParser parse = new QueryParser(LN.Util.Version.LUCENE_30, "title", PanGuAnalyzer);//一个字段查询  
                //    Query query = parse.Parse(title);
                //    parse.DefaultOperator = QueryParser.Operator.OR;
                //    bQuery.Add(query, new Occur());
                //    dic.Add("title", title);
                //}
    
                #endregion
    
                string[] fileds = { "title", "content" };//查询字段  
                searchKey = GetKeyWordsSplitBySpace(searchKey);
                QueryParser parse = new MultiFieldQueryParser(LN.Util.Version.LUCENE_30, fileds, PanGuAnalyzer);//多个字段查询
                Query query = parse.Parse(searchKey);
                bQuery.Add(query, new Occur());
    
                dic.Add("title", searchKey);
                dic.Add("content", searchKey);
    
                if (bQuery != null && bQuery.GetClauses().Length > 0)
                {
                    GetSearchResult(bQuery, dic);
                }
            }
    
            /// <summary>
            /// 获取
            /// </summary>
            /// <param name="bQuery"></param>
            private void GetSearchResult(BooleanQuery bQuery, Dictionary<string, string> dicKeywords)
            {
                IndexSearcher search = new IndexSearcher(direcotry, true);
                // Stopwatch stopwatch = Stopwatch.StartNew();
                //SortField构造函数第三个字段true为降序,false为升序
                Sort sort = new Sort(new SortField("addtime", SortField.DOC, true));
                int maxNum = 100;//查询条数
                TopDocs docs = search.Search(bQuery, (Filter)null, maxNum, sort);
    
                if (docs != null)
                {
    
                    for (int i = 0; i < docs.TotalHits && i < maxNum; i++)
                    {
                        Document doc = search.Doc(docs.ScoreDocs[i].Doc);
                        Article model = new Article()
                        {
                            Title = doc.Get("title").ToString(),
                            Content = doc.Get("content").ToString(),
                            AddTime = doc.Get("addtime").ToString()
                        };
                        list.Add(SetHighlighter(dicKeywords, model));
    
                    }
                }
    
            }
    
            /// <summary>
            /// 索引存放目录
            /// </summary>
            protected string IndexDic
            {
                get
                {
                    return Application.StartupPath + "/IndexDic";
                }
            }
    
            public LN.Store.Directory direcotry
            {
                get
                { //创建索引目录
                    if (!System.IO.Directory.Exists(IndexDic))
                    {
                        System.IO.Directory.CreateDirectory(IndexDic);
                    }
    
                    LN.Store.Directory direcotry = FSDirectory.Open(IndexDic);
                    return direcotry;
                }
    
            }
            /// <summary>
            /// 盘古分词的配置文件
            /// </summary>
            protected string PanGuXmlPath
            {
                get
                {
                    return Application.StartupPath + "/PanGu/PanGu.xml";
                }
            }
    
            /// <summary>
            /// 盘古分词器
            /// </summary>
            protected Analyzer PanGuAnalyzer
            {
                get { return new PanGuAnalyzer(); }
    
            }
    
            /// <summary>
            /// 处理关键字为索引格式
            /// </summary>
            /// <param name="keywords"></param>
            /// <returns></returns>
            private string GetKeyWordsSplitBySpace(string keywords)
            {
                PanGuTokenizer ktTokenizer = new PanGuTokenizer();
                StringBuilder result = new StringBuilder();
                ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
    
                foreach (WordInfo word in words)
                {
                    if (word == null)
                    {
                        continue;
                    }
                    result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
                }
                return result.ToString().Trim();
            }
    
            /// <summary>
            /// 设置关键字高亮
            /// </summary>
            /// <param name="dicKeywords">关键字列表</param>
            /// <param name="model">返回的数据模型</param>
            /// <returns></returns>
            private Article SetHighlighter(Dictionary<string, string> dicKeywords, Article model)
            {
                SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color="green">", "</font>");
                Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment());
                highlighter.FragmentSize = 50;
                string strTitle = string.Empty;
                string strContent = string.Empty;
                dicKeywords.TryGetValue("title", out strTitle);
                dicKeywords.TryGetValue("content", out strContent);
                if (!string.IsNullOrEmpty(strTitle))
                {
                    var transStr = highlighter.GetBestFragment(strTitle, model.Title);
                    model.Title = string.IsNullOrEmpty(transStr) ? model.Title : transStr;
                }
                if (!string.IsNullOrEmpty(strContent))
                {
                    var transStr = highlighter.GetBestFragment(strContent, model.Content);
                    model.Content = string.IsNullOrEmpty(transStr) ? model.Content : transStr;
                }
                return model;
            }
    
            /// <summary>
            /// 查询方法
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void btnSearch_Click(object sender, EventArgs e)
            {
                list.Clear();
                this.txtResult.Text = "";
                SearchIndex(this.textBox2.Text);
                if (list.Count == 0)
                {
                    this.txtResult.Text = "没有查询到结果";
                    return;
                }
                for (int i = 0; i < list.Count; i++)
                {
                    this.txtResult.Text += "标题:" + list[i].Title + " 内容:" + list[i].Content + " 时间:" + list[i].AddTime + "
    ";
                }
            }
    
    
    
    
    
    
    
    
    
            #region 删除索引数据(根据id)  
            /// <summary>  
            /// 删除索引数据(根据id)  
            /// </summary>  
            /// <param name="id"></param>  
            /// <returns></returns>  
            public bool Delete(string id)
            {
                bool IsSuccess = false;
                Term term = new Term("id", id);
                //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);  
                //Version version = new Version();  
                //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询  
                //Query query = parser.Parse("小王");  
    
                //IndexReader reader = IndexReader.Open(directory_luce, false);  
                //reader.DeleteDocuments(term);  
                //Response.Write("删除记录结果: " + reader.HasDeletions + "<br/>");  
                //reader.Dispose();  
    
                IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);  
                ////writer.DeleteAll();  
                writer.Commit();
                //writer.Optimize();//  
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
                return IsSuccess;
            }
            #endregion
    
            #region 删除全部索引数据  
            /// <summary>  
            /// 删除全部索引数据  
            /// </summary>  
            /// <returns></returns>  
            public bool DeleteAll()
            {
                bool IsSuccess = true;
                try
                {
                
                    IndexWriter writer = new IndexWriter(direcotry, PanGuAnalyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                    writer.DeleteAll();
                    writer.Commit();
                    //writer.Optimize();//  
                    IsSuccess = writer.HasDeletions();
                    writer.Dispose();
                }
                catch
                {
                    IsSuccess = false;
                }
                return IsSuccess;
            }
            #endregion
    
        }
    
    
    
        public class Article
        {
            public string Id
            {
                set;
                get;
            }
    
            public string Title
            {
                set;
                get;
            }
    
            public string Content
            {
                set;
                get;
            }
    
            public string AddTime
            {
                set;
                get;
            }
        }
    }
    View Code

    实例下载地址:lucuneTest.zip

  • 相关阅读:
    Java开源爬虫框架crawler4j
    Java——关于static关键字的那些事总结
    Java——关于static关键字的那些事总结
    Struts2+Hibernate实现图书管理系统
    Struts2+Hibernate实现图书管理系统
    JDBC+Servlet+JSP实现基本的增删改查(简易通讯录)
    JDBC+Servlet+JSP实现基本的增删改查(简易通讯录)
    结合BeautyEye开源UI框架实现的较美观的Java桌面程序
    结合BeautyEye开源UI框架实现的较美观的Java桌面程序
    HTTP和HTTPS详解。
  • 原文地址:https://www.cnblogs.com/fj99/p/5513006.html
Copyright © 2011-2022 走看看