zoukankan      html  css  js  c++  java
  • Lucene.net 全文检索 盘古分词

    lucene.net + 盘古分词

    引用:

    1.Lucene.Net.dll

    2.PanGu.Lucene.Analyzer.dll 

    3.PanGu.HighLight.dll

    4.PanGu.dll

     1 using Lucene.Net.Search;
     2 using Lucene.Net.Store;
     3 using Lucene.Net.QueryParsers;
     4 using Lucene.Net.Documents;
     5 using Lucene.Net.Index;
     6 using Lucene.Net.Analysis.Standard;
     7 using Lucene.Net.Analysis;
     8 using Lucene.Net.Analysis.PanGu;
     9 using PanGu.HighLight;
    10 using PanGu;

    1.建立索引:

     1 static string path = @"G:indextest";//索引文件储存位置
     2 
     3 static void CreateIndex()
     4         {
     5             //创建索引库目录
     6             var directory = FSDirectory.Open(new DirectoryInfo(path));
     7             Analyzer analyzer = null;
     8             //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
     9 
    10             if (isPangu)
    11             {
    12                 analyzer = new PanGuAnalyzer();//盘古Analyzer
    13             }
    14             else
    15             {
    16                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
    17             }
    18 
    19             //创建一个索引,采用StandardAnalyzer对句子进行分词
    20             IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
    21             MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop");
    22             conn.Open();
    23             MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn);
    24             MySqlDataReader reader = cmd.ExecuteReader();
    25             while (reader.Read())
    26             {
    27                 //域的集合:文档,类似于表的行
    28                 Document doc = new Document();
    29                 //要索引的字段
    30                 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
    31                 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
    32                 indexWriter.AddDocument(doc);
    33             }
    34             reader.Close();
    35             //对索引文件进行优化
    36             indexWriter.Optimize();
    37             indexWriter.Close();
    38         }

    2.搜索:

     1      protected void Page_Load(object sender, EventArgs e)
     2         {
     3             keyword = Request.Form["q"];
     4             if (keyword != null && keyword != "")
     5             {
     6                 var watch = Stopwatch.StartNew();
     7                 Analyzer analyzer = null;
     8                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
     9 
    10                 //搜索
    11                 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true);
    12 
    13                 //查询表达式
    14                 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer);
    15 
    16                 //query.parse:注入查询条件
    17                 Query query = queryP.Parse(keyword);
    18                 var hits = searcher.Search(query, 200);
    19 
    20                 //create highlighter
    21                 //IFormatter formatter = new SimpleHTMLFormatter("<span style="font-weight:bold;color: red;">", "</span>");
    22                 //SimpleFragmenter fragmenter = new SimpleFragmenter(80);
    23                 //var scorer = new QueryScorer(query);
    24                 //Highlighter highlighter = new Highlighter(formatter, scorer);
    25                 //highlighter.TextFragmenter = fragmenter;
    26 
    27                 //PanGu create highlighter
    28                 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
    29                    new PanGu.HighLight.SimpleHTMLFormatter("<span style="font-weight:bold;color: red;">", "</span>");
    30                 PanGu.HighLight.Highlighter highlighter =
    31                     new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
    32                     new Segment());
    33                 highlighter.FragmentSize = 50;
    34 
    35                 for (int i = 0; i < hits.totalHits; i++)
    36                 {
    37                     Document doc = searcher.Doc(hits.scoreDocs[i].doc);
    38                     //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name")));
    39                     //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "...");
    40                     goods g = new goods();
    41                     g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name"));
    42                     g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief"));
    43                     gs.Add(g);
    44                 }
    45 
    46                 watch.Stop();
    47 
    48                 tasktime = "搜索耗费时间:" + watch.ElapsedMilliseconds + "毫秒";
    49             }
    50         }

     多字段搜索

    1  string[] fields = { "Title", "Content" };
    2                 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer);
    3                 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer);
    4                 var hits1 = searcher.Search(multiquery, 200);
  • 相关阅读:
    yum 安装包的用法
    php session文件修改路径
    apache性能测试工具ab使用详解
    shell exit 0 exit 1
    网站架构(页面静态化,图片服务器分离,负载均衡)方案全解析
    【转载】新手如何快速打造高流量网站
    高并发处理方案
    HTML静态化技术
    在项目中学习.NET的json(二)之运费计算器
    在项目中学习.NET的json(一)
  • 原文地址:https://www.cnblogs.com/mahatmasmile/p/3193911.html
Copyright © 2011-2022 走看看