zoukankan      html  css  js  c++  java
  • Lucene.net使用代码(一)

      lucene.net配合盘古分词实现中文站内搜索,首先配置好盘古分词的Dict目录,将其放在项目下,文件夹下的所有文件输出到bin文件。

      lucene.net下使用的代码:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Threading;
    using Lucene.Net.Store;
    using Lucene.Net.Index;
    using System.IO;
    using log4net;
    using Lucene.Net.Analysis.PanGu;
    using RPSite.BLL;
    using Lucene.Net.Documents;
    
    namespace RPSite.Search
    {
        public class IndexManager
        {
            private static ILog logger = LogManager.GetLogger(typeof(IndexManager));
    
            private static IndexManager instance = new IndexManager();
    
            //所有的地方要对索引库进行修改都通过IndexManager,所以要单例
            //因为同时只能有一个在写索引库,所以由“消费者”来进行写
            //别的地方想写索引库要请求“消费者”来进行写AddArticle
            private IndexManager()
            {
    
            }
    
             /// <summary>
             /// 启动消费者线程
             /// </summary>
            public void Start()
            {
                Thread threadIndex = new Thread(Index);
                threadIndex.IsBackground = true;
                threadIndex.Start();
            }
    
            private void Index()
            {
                while (true)
                {
                    //防止空转造成cpu占用率过高
                    if (jobs.Count <= 0)
                    {
                        logger.Debug("没有任务,再睡会!");
                        Thread.Sleep(5 * 1000);
                        continue;
                    }
    
                    //为什么每次循环都要打开、关闭索引库。因为关闭索引库以后才会把写入的数据提交到索引库中。也可以每次操作都“提交”(参考Lucene.net文档)
    
                    string indexPath = "c:/cmsindex";
                    FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
                    bool isUpdate = IndexReader.IndexExists(directory);
                    logger.Debug("索引库存在状态" + isUpdate);
                    if (isUpdate)
                    {
                        //如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
                        if (IndexWriter.IsLocked(directory))
                        {
                            logger.Debug("开始解锁索引库");
                            IndexWriter.Unlock(directory);
                            logger.Debug("解锁索引库完成");
                        }
                    }
    
                    IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
    
                    ProcessJobs(writer);
    
                    writer.Close();
                    directory.Close();//不要忘了Close,否则索引结果搜不到
                    logger.Debug("全部索引完毕");
                }
            }
    
            private void ProcessJobs(IndexWriter writer)
            {
                foreach (var job in jobs.ToArray())
                {
                    //todo:异常处理
                    jobs.Remove(job);// 消费掉
                    //因为是自己的网站,所以直接读取数据库,不用webclient了
                    //为避免重复索引,所以先删除number=i的记录,再重新添加
                    writer.DeleteDocuments(new Term("number", job.Id.ToString()));
    
                    //如果“添加文章”任务再添加,
                    if (job.JobType == JobType.Add)
                    {
                        RP_ArticleBLL artBll = new RP_ArticleBLL();
                        if (artBll == null)//有可能刚添加就被删除了
                        {
                            continue;
                        }
                        var art = artBll.GetById(job.Id);
                        string title = art.Title;
                        string body = art.Msg;//去掉标签                
    
                        Document document = new Document();
                        //只有对需要全文检索的字段才ANALYZED
                        document.Add(new Field("number", job.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                        document.Add(new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED));
                        document.Add(new Field("body", body, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
                        writer.AddDocument(document);
                        logger.Debug("索引" + job.Id + "完毕");
                    }
    
                    
                }
            }
    
            public static IndexManager GetInstance()
            {
                //Queue<string> q;
                //q.ad
                return instance;
            }
    
            //private Queue<
            private List<IndexJob> jobs = new List<IndexJob>();
    
            public void AddArticle(int artId)
            {
                IndexJob job = new IndexJob();
                job.Id = artId;
                job.JobType = JobType.Add;
                logger.Debug(artId+"加入任务列表");
                jobs.Add(job);//把任务加入商品库
            }
    
            public void RemoveArticle(int artId)
            {
                IndexJob job = new IndexJob();
                job.JobType = JobType.Remove;
                job.Id = artId;
                logger.Debug(artId + "加入删除任务列表");
                jobs.Add(job);//把任务加入商品库
            }
        }
    
        class IndexJob
        {
            public int Id { get; set; }
            public JobType JobType { get; set; }
        }
    
        enum JobType { Add,Remove}
    }
    

      

  • 相关阅读:
    HDOJ/HDU 1015 Safecracker(枚举、暴力)
    nodejs之入门
    git错误收集总结
    git基本操作
    git使用前配置
    花开花落花非花、缘起缘灭缘随缘
    js之定时器
    js之Date(日期对象)
    es5严格模式简谈
    try...catch
  • 原文地址:https://www.cnblogs.com/skybreak/p/3062725.html
Copyright © 2011-2022 走看看