zoukankan      html  css  js  c++  java
  • Lucene.net 全文检索数据库

    原文:https://www.cnblogs.com/LTEF/p/10403114.html

    https://www.cnblogs.com/zuowj/p/11689563.html

    Lucene是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,Lucene.NET是C#及.NET运行时下的另一种语言的实现,官网地址:http://lucenenet.apache.org/ 

    需要安装:Lucene.Net 和 Lucene.Net.Analysis.PanGu

    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.PanGu;
    using Lucene.Net.Analysis.Tokenattributes;
    using Lucene.Net.Documents;
    using Lucene.Net.Index;
    using Lucene.Net.QueryParsers;
    using Lucene.Net.Search;
    using Lucene.Net.Store;
    using Newtonsoft.Json;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Text;
    using System.Windows.Forms;
     
      private void button1_Click(object sender, EventArgs e)
            {
                //StringBuilder sb = new StringBuilder();
                //索引库目录
                Lucene.Net.Store.Directory dir_search =
                    FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"), new NoLockFactory());
                IndexReader reader = IndexReader.Open(dir_search, true);
                IndexSearcher search = null;
                try
                {
                    search = new IndexSearcher(reader);
                    QueryParser parser =
                        new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "ItemName", new PanGuAnalyzer());
                    Query query = parser.Parse(LuceneHelper.GetKeyWordSplid(textBox1.Text));
                    //执行搜索,获取查询结果集对象  
                    TopDocs ts = search.Search(query, null, 20000);
                    //获取命中的文档信息对象  
                    ScoreDoc[] docs = ts.ScoreDocs;
                    Console.WriteLine(docs.Length);
                    foreach (var t in docs)
                    {
                        int docId = t.Doc;
                        Document doc = search.Doc(docId);
                        var id = doc.Get("id");
                        Console.WriteLine(id);
                        var itemName = doc.Get("ItemName");
                        Console.WriteLine(itemName);
                        var purity = doc.Get("Purity");
                        Console.WriteLine(purity);
                        var size = doc.Get("Size");
                        Console.WriteLine(size);
                        var unit = doc.Get("Unit");
                        Console.WriteLine(unit);
                        var venderName = doc.Get("VenderName");
                        Console.WriteLine(venderName);
                        textBox2.Text = JsonConvert.SerializeObject(doc);
                    }
                }
                catch (Exception ex)
                {
                    throw;
                }
                finally
                {
                    search?.Dispose();
                    dir_search?.Dispose();
                }
    
    
            }
    
            private void button2_Click(object sender, EventArgs e)
            {
                Console.WriteLine(@"开始创建索引");
                var bills = new List<ItemInfo>
                {
                    new ItemInfo
                    {
                        ItemId = 1000,
                        ItemName = "吃葡萄不吐葡萄皮",
                        Price = "100",
                        Purity = "100",
                        Size = "1",
                        Unit = "1",
                        VenderName = "2"
                    },
                    new ItemInfo
                    {
                        ItemId = 2000,
                        ItemName = "年轻人不讲武德",
                        Price = "100",
                        Purity = "100",
                        Size = "1",
                        Unit = "1",
                        VenderName = "2"
                    },
                    new ItemInfo
                    {
                        ItemId = 1000,
                        ItemName = "耗子尾汁",
                        Price = "100",
                        Purity = "100",
                        Size = "1",
                        Unit = "1",
                        VenderName = "2"
                    },
                    new ItemInfo
                    {
                        ItemId = 1000,
                        ItemName = "闪电五连鞭",
                        Price = "100",
                        Purity = "100",
                        Size = "1",
                        Unit = "1",
                        VenderName = "2"
                    }
                };
                CreateIndex(bills);
            }
    
            //帮助类,对搜索的关键词进行分词
            public static class LuceneHelper
            {
                public static string GetKeyWordSplid(string keywords)
                {
                    StringBuilder sb = new StringBuilder();
                    Analyzer analyzer = new PanGuAnalyzer();
                    TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords));
                    ITermAttribute ita = null;
                    bool hasNext = stream.IncrementToken();
                    while (hasNext)
                    {
                        ita = stream.GetAttribute<ITermAttribute>();
                        sb.Append(ita.Term + " ");
                        hasNext = stream.IncrementToken();
                    }
    
                    return sb.ToString();
                }
            }
    
            /// <summary>
            /// 创建索引文件
            /// </summary>
            private static void CreateIndex(List<ItemInfo> list)
            {
                IndexWriter writer = null;
                Analyzer analyzer = new PanGuAnalyzer();
                Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("ItemIndexDir"));
                int i = 0;
                try
                {
                    ////IndexReader:对索引进行读取的类。
                    //该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。
                    bool isCreate = !IndexReader.IndexExists(dir);
                    writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);
                    //添加索引
                    foreach (var item in list)
                    {
                        Document doc = new Document();
                        if (item.ItemId % 1000 == 0)
                            Console.WriteLine($@"开始写入{item.ItemId}");
    
                        doc.Add(new Field("id", item.ItemId.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                        i = 1;
                        doc.Add(new Field("ItemName", item.ItemName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                        i = 2;
                        doc.Add(new Field("Purity", item.Purity, Field.Store.YES, Field.Index.ANALYZED));
                        i = 3;
                        doc.Add(new Field("Size", item.Size, Field.Store.YES, Field.Index.ANALYZED));
                        i = 4;
                        doc.Add(new Field("Unit", item.Unit, Field.Store.YES, Field.Index.ANALYZED));
                        i = 5;
                        doc.Add(new Field("VenderName", item.VenderName, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                        i = 6;
                        doc.Add(new Field("Price", item.Price, Field.Store.YES, Field.Index.ANALYZED));
                        i = 7;
    
                        writer.AddDocument(doc, analyzer);
                    }
                    writer.Optimize();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                    Console.WriteLine($@"error step {i}");
                    throw;
                }
                finally
                {
                    writer?.Dispose();
                    dir?.Dispose();
                }
            }
    
            public class ItemInfo
            {
                public int ItemId { get; set; }
                public string ItemName { get; set; }
                public string Purity { get; set; }
                public string Size { get; set; }
                public string Unit { get; set; }
                public string VenderName { get; set; }
                public string Price { get; set; }
            }
  • 相关阅读:
    谈谈目前书店里面的计算机书籍“含量”情况 发发看法
    注意:CSS中加入中文注释,会使.NET中样式丢失
    今天很是郁闷
    用周末的时间,通过BT终于把VS2005Team版下载下来啦~~~
    今天在网上找到了两个常用建模工具的下载地址 ,速度还不错
    C#下如何实现服务器+客户端的聊天程序
    CSS网页制作技巧:图片的自适应居中和兼容处理(转)
    SASS用法指南(转)
    CSS选择器学习小结
    JavaScript编写计算器《JavaScript王者归来》读书笔记1
  • 原文地址:https://www.cnblogs.com/zhang1f/p/14330506.html
Copyright © 2011-2022 走看看