zoukankan      html  css  js  c++  java
  • 盘古搜索--实例解析

    1.引用

    PanGu.dll
    Lucene.Net.dll
    PanGu.Lucene.Analyzer.dll
    PanGu.HighLight.dll--高亮使用
     
    2.Dict文件夹
    文件夹名称一定要为Dict,并且设置“如果较新则复制”
     
    3.创建索引部分。
    思路:把添加的消息放在redis队列中,从队列中读取消息,并且添加索引。
    利用线程读队列然后一条一条写入索引。
    写入索引的部分通常是单独项目中,因为写入索引会非常耗内存
     1 public class MessageIndex
     2     {
     3         Thread thread;
     4         public bool IsRunning { get; set; }
     5         public void Start()
     6         {
     7             IsRunning = true;
     8             thread = new Thread(RunScan);//线程开始就不断的从队列中取出消息,,先取消息再写入索引
     9             thread.IsBackground = false;//后台线程,关闭程序就退出
    10             thread.Start();
    11         }
    12         public void RunScan()
    13         {
    14             while (IsRunning)
    15             {
    16                 using (var client = RedisManager.ClientManager.GetClient())
    17                 {
    18                     StartIndex(client);//读出消息后取出写入索引
    19                 }
    20             }
    21         }
    22         public void StartIndex(IRedisClient client)
    23         {
    24             FSDirectory directory = null;
    25             IndexWriter writer = null;
    26             try
    27             {
    28                 string indexPath = @"F:	mpindex2";//注意和磁盘上文件夹的大小写一致,否则会报错。索引保存位置
    29                 directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
    30                 bool isExists = IndexReader.IndexExists(directory);
    31                 if (isExists)
    32                 {
    33                     //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
    34                     //Lucene.Net在写索引库之前会自动加锁,在close的时候会自动解锁
    35                     //不能多线程执行,只能处理意外被永远锁定的情况
    36                     if (IndexWriter.IsLocked(directory))
    37                     {
    38                         IndexWriter.Unlock(directory);//强制解锁
    39                     }
    40                 }
    41                 writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExists, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
    42                 //从redis队列中取出数据
    43                 while (true)
    44                 {
    45                     string json = client.DequeueItemFromList("QiuShiBaiKe.Message");
    46                     //取出可能为null
    47                     if (string.IsNullOrEmpty(json))
    48                     {
    49                         Thread.Sleep(3000);//
    50                         return;//没有消息可读就return
    51                     }
    52                     else
    53                     {
    54                         //反序列化
    55                         Message message = new JavaScriptSerializer().Deserialize<Message>(json);
    56                         WriteIndex(message, writer);
    57                         
    58                     }
    59                   
    60                 }
    61             }
    62             catch (Exception)
    63             {
    64                 throw new Exception("写入索引出错");
    65             }
    66             finally
    67             {
    68                 writer.Close();
    69                 directory.Close();//不要忘了Close,否则索引结果搜不到
    70             }
    71         }
    72         /// <summary>
    73         /// 写入索引
    74         /// </summary>
    75         /// <param name="message"></param>
    76         private void WriteIndex(Message message, IndexWriter writer)
    77         {
    78             //将重复的字段删掉
    79             writer.DeleteDocuments(new Term("id", message.Id.ToString()));
    80             //添加记录
    81             Document document = new Document();//一条Document相当于一条记录
    82             document.Add(new Field("id", message.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//每个Document可以有自己的属性(字段),所有字段名都是自定义的,值都是string类型
    83             document.Add(new Field("message", message.Msg, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
    84             writer.AddDocument(document);//insert into...插入一条记录,有两个字段:number和body
    85             Console.WriteLine("id="+message.Id.ToString()+"msg="+message.Msg);
    86         }
    87     }

    在索引的主程序中,调用

    MessageIndex messageIndex = new MessageIndex();
     messageIndex.Start();
    4.搜索部分--使用MVC框架
    1)分词方法:
    //分词
            static public string GetKeyWordsSplitBySpace(string keywords, PanGuTokenizer ktTokenizer)
            {
                StringBuilder result = new StringBuilder();
                ICollection<WordInfo> words = ktTokenizer.SegmentToWordInfos(keywords);
                foreach (WordInfo word in words)
                {
                    if (word == null)
                    {
                        continue;
                    }
                    result.AppendFormat("{0}^{1}.0 ", word.Word, (int)Math.Pow(3, word.Rank));
                }
                return result.ToString().Trim();
            } 
    2)搜索并且对结果高亮显示
     //搜索
            public static List<SearchResult> SearchResult(String indexDir, String q, int pageLen, int pageNo, out int recCount)
            {
                string keywords = q;
                IndexSearcher search = new IndexSearcher(indexDir);
                q = GetKeyWordsSplitBySpace(q, new PanGuTokenizer());
                QueryParser queryParser = new QueryParser("message", new PanGuAnalyzer(true));
                Query query = queryParser.Parse(q);//在message中搜索keyword
                //当然这里可以添加多个搜索条件
                //QueryParser titleQueryParser = new QueryParser("title", new PanGuAnalyzer(true));
                //Query titleQuery = titleQueryParser.Parse(q);
                BooleanQuery bq = new BooleanQuery();
                bq.Add(query, BooleanClause.Occur.SHOULD);
                //bq.Add(titleQuery, BooleanClause.Occur.SHOULD);
                Hits hits = search.Search(bq);
                List<SearchResult> results = new List<SearchResult>();
                recCount = hits.Length();//搜索结果的总条数
                int i = (pageNo - 1) * pageLen;
                while (i < recCount && results.Count < pageLen)//这里是一个分页处理
                {
                    SearchResult result = null;
                    try
                    {
                        result = new SearchResult();
                        result.Message = hits.Doc(i).Get("message");
                        result.MessageUrl = "/Message/PreviewMessage/" + hits.Doc(i).Get("id");
                        //news.Url = hits.Doc(i).Get("url");
                        //String strTime = hits.Doc(i).Get("time");
                        //news.Time = DateTime.ParseExact(strTime, "yyyyMMdd", null);
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
                            new PanGu.HighLight.SimpleHTMLFormatter("<font color="red">",
    "</font>");
                        PanGu.HighLight.Highlighter highlighter =
                            new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
                            new Segment());
                        highlighter.FragmentSize = 50;
                        result.MessageHightLigther = highlighter.GetBestFragment(keywords, result.Message);//高亮显示的消息
                        //news.TitleHighLighter = highlighter.GetBestFragment(keywords, news.Title);
                        if (string.IsNullOrEmpty(result.MessageHightLigther))
                        {
                            result.MessageHightLigther = result.Message;
                        }
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.Message);
                    }
                    finally
                    {
                        results.Add(result);
                        i++;
                    }
                }
                search.Close();
                return results;
            } 

    3)搜索结果的类

    1  public class SearchResult
    2     {
    3         public string Message { get;set;}//消息
    4         public string MessageUrl { get; set; }//消息详细页链接
    5         public string MessageHightLigther { get; set; }//高亮显示的消息
    6     }

    4)在Controller中的主代码

     1 public ActionResult Search(string keyword, int? pageIndex)
     2         {
     3             if (pageIndex==null)
     4             {
     5                 pageIndex = 1;
     6             }
     7             if (pageIndex<=0)
     8             {
     9                return View("Error", (object)"pageIndex必须大于0");
    10             }
    11             int totalCount;
    12           List<SearchResult>results=  SearchResult(@"F:	mpindex2", keyword, 3, (int)pageIndex, out totalCount);
    13             ViewBag.KeyWord = keyword;
    14             ViewBag.SearchResults = results;
    15             ViewBag.TotalCount = totalCount;//总结果条数
    16             ViewBag.PageIndex = pageIndex;
    17             ViewBag.PageSize = 3;
    18             return View();
    19         }

    5)Search View中的代码

     1 @{
     2     ViewBag.Title = "搜索" + ViewBag.KeyWord;
     3     Layout = "~/Views/DefaultLayout.cshtml";
     4 }
     5 @{
     6     foreach (var result in ViewBag.SearchResults)
     7     {
     8     <div >
     9         <a href="@result.MessageUrl" target="_blank">@(new HtmlString(result.MessageHightLigther))</a>
    10     </div>
    11     
    12     }
    13 }
    14 @QiuShiBaiKe.Web.WebHelper.Pager("/Message/Search/{pageIndex}/"+ViewBag.KeyWord, ViewBag.TotalCount, ViewBag.PageIndex, ViewBag.PageSize)

    6)搜索的路由配置

    1 //搜索路由
    2             routes.MapRoute(
    3               name: "search",
    4               url: "{controller}/{action}/{pageIndex}/{keyword}",
    5               defaults: new { controller = "Message", action = "Search", pageIndex = UrlParameter.Optional, keyword=UrlParameter.Optional }
    6           );
     
  • 相关阅读:
    java 反射 报错:Attempt to get java.lang.Integer field "..." with illegal data type conversion to int
    经常报错:Communications link failure
    解析Excel
    spring+atomikos+mybatis 多数据源事务(动态切换)
    mysql 存储过程
    Ace Admin 学习笔记
    spring mvc 表单提交 乱码
    spring 事务
    基于注解的Spring多数据源配置和使用(非事务)
    javaEE版本的eclipse中导入工程,发现server里面找不到工程,根本发布不了也不能运行
  • 原文地址:https://www.cnblogs.com/lucyliang/p/4934515.html
Copyright © 2011-2022 走看看