zoukankan      html  css  js  c++  java
  • Lucene.NET 开发实现

    最近在帮一个朋友忙,帮他们一个软件设计一下架构,该应用程序某核心逻辑设计数据量较大,客户对查询要求又很高。这种需求除了在数据库设计要考虑水平分表,分区视图之类的设计,在程序中也要考虑效率问题,于是就决定使用LUCENE.NET将核心数据做索引文件,作假全文搜索,这样就算数据量由千万级别,查询也只在几秒钟完成,对性能帮助还是有很大帮助。之前也没仔细了解过LUCENE方面技术,正好学习一下。

    LUCENE.NET是JAVA移植到.NET平台上的开源技术,技术资料也很丰富。

    以下是创建索引代码:

      public static void Run()
            {
                QryPage qryPage = new QryPage();
                qryPage.PerPageSize = 350;
                qryPage.PageNumber = 0;
                qryPage.PageCount = 10000;
                qryPage.NeedInitPageNo = false;
                List<AutoParts> packages = new List<AutoParts>();
                while (qryPage.PageNumber < qryPage.PageCount)
                {
                    IList<AutoParts> autoPartses = new CustomerQuery().QueryAutoParts(new AutoPartDTO(),  ref qryPage);//获取索引数据
                    foreach (var p in autoPartses)
                    {
                        if (!IsValidProduct(p))
                        {
                            continue;
                        }
                         packages.Add(p);
    
                    }
                  qryPage.PageNumber++;
                }
    
                //
                // Write search item index to file.
                //
                Write(packages);
    
            }
    
            public static void Write(List<AutoParts> packages)
            {
                build( packages);
            }
    
            public static void build( List<AutoParts> packages)
            {
            
                var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), true);
                try
                {
                    writer.SetMaxFieldLength(1000);
                    writer.SetUseCompoundFile(true);
                    Logger.Info("Indexing to directory '" + Common.ProductIndexPath + "'...");
                    DateTime start = System.DateTime.Now;
                    indexDocs(writer, packages);
                    
                    Logger.Info("Optimizing...");
                    writer.Optimize();
                    writer.Close();
                   
                    DateTime end = System.DateTime.Now;
                    //Console.Out.WriteLine(end.Ticks - start.Ticks + " total milliseconds");
    
                    Logger.Info(end.Ticks - start.Ticks + " total milliseconds");
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.Message);
                }
            }
    
            public static void UpdateIndex(AutoParts dto)
            {
                try
                {
                    Term tm = new Term("id", dto.Id.ToString());
                    var qerty = new TermQuery(tm);
                         var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
                 var   searcher = new IndexSearcher(productIndexReader);
                    var his = searcher.Search(qerty);
    
                    var   reader = IndexReader.Open(Common.ProductIndexPath);
                    reader.DeleteDocuments(tm);
                    var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);
    
                    AddDocument(dto, writer);
                    writer.Optimize();
                    writer.Close();
                }
                catch (Exception e)
                {
                    Console.WriteLine("添加索引出错,配件ID:" + dto.Id + "\n");
                    Console.Write(e.Message);
                }
             
              }
            public static void AddDocument(AutoParts dto)
            {
                try
                {
                    var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false);
                    AddDocument(dto, writer);
                    writer.Optimize();
                    writer.Close();
                }
                catch (Exception e)
                {
                    Console.WriteLine("添加索引出错,配件ID:"+dto.Id+"\n");
                    Console.WriteLine(e.Message);
                    
                    throw;
                }
           
            }
    
            private static void AddDocument(AutoParts package, IndexWriter getWriter)
            {
                Document doc = new Document();
    
                doc.Add(new Field("id", package.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("CarCategoryId", package.CarCategoryId.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("Name", package.Name, Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("Code", package.Code, Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("FSPrice", package.FSPrice, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("YCPrice", package.YCPrice, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("YCCost", package.YCCost, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("YCSupplier", package.YCSupplier, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("FCPrice", package.FCPrice, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("FCCost", package.FCCost, Field.Store.YES, Field.Index.NO));
                //doc.Add(new Field("FCSupplier", new StringReader(package.FCSupplier) ));
                doc.Add(new Field("FCSupplier", package.FCCost, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("CCPrice", package.CCPrice, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("CCCost", package.CCCost, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("CCSupplier", package.CCSupplier, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Repire", package.Repire, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("AskPriceInfo", package.AskPriceInfo, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("AskCustomer", package.AskCustomer, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Description", package.Description, Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Picture1", package.Picture1.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Picture2", package.Picture2.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("Picture3", package.Picture3.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("IsAvaliable", package.IsAvaliable.ToString(), Field.Store.YES, Field.Index.NO));
                doc.Add(new Field("CarTypeTags", package.CarTypeTags, Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("ModifiedTime", package.ModifiedTime.ToShortDateString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("ModifiedBy", package.ModifiedBy, Field.Store.YES, Field.Index.NO));
    
    
                getWriter.AddDocument(doc);
            }
    
            private static void indexDocs(IndexWriter writer, List<AutoParts> packages)
            {
                try
                {
                    int i = 0;
                    foreach (var package in packages)
                    {
                        i++;
                        Console.WriteLine("生成索引顺序"+i);
                        AddDocument(package, writer);
                    }
                }
                catch (Exception e)
                {
                    Console.Write(e.Message);
                }
            }
            private static bool IsValidProduct(AutoParts autoParts)
            {
                return true;
            }
        }
    

      其中更新索引方法还在调试,因为发现删除索引方法不成功,还在调试

     下面是查询核心算法,其中也包含了分页查询,完全可以按照数据库一致的方式来进行查询,核心数据底层查询就如下查询即可

    public static List<AutoPartDTO> Query(QueryCritiriaDTO dto, ref QryPage page)// int pageIndex, int pageSize, out int totalRec)
            {
                IndexSearcher searcher;
                        if (page.PageNumber == 0)
                        page.PageNumber = 1;
    
                    Sort sort = new Sort(new SortField("id", SortField.DOC, false));
    
                    Query query = CreateQuery(dto);
                    MutiFilter filter = CreateFilter(dto);
    
                    query = filter.getFilterQuery(query);
    
                    var productIndexReader = IndexReader.Open(Common.ProductIndexPath);
                    searcher = new IndexSearcher(productIndexReader);
                try
                {
                    TopDocs topDocs = searcher.Search(query, null, page.PageNumber * page.PerPageSize, sort);
                    page.TotalCount = topDocs.totalHits;
                    page.PageCount = (int)Math.Ceiling((decimal)page.TotalCount / (decimal)page.PerPageSize);
    
                    if (page.PageCount == 1 || page.PageCount == 0)
                        return TopDocs2Data(searcher, topDocs.scoreDocs);
    
                    return TopDocs2Data(searcher,topDocs.scoreDocs, page);
                }
                catch (Exception e)
                {
                    Console.WriteLine("查询出错");
                    Console.WriteLine(e.Message);
                    return new List<AutoPartDTO>();
                }
                finally
                {
                    searcher.Close();
                }
            }
    
            private static Query CreateQuery(QueryCritiriaDTO dto)
            {
                var booleanQuery = new BooleanQuery();
    
                if (dto.CatetoryL3 != -1 && dto.CatetoryL3 != 0)
                {
                    TermQuery searcher = new TermQuery(new Term("CarCategoryId", dto.CatetoryL3.ToString()));
                    booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
                }
    
                if (dto.CatetoryL4 != -1 && dto.CatetoryL4 != 0)
                {
                    FuzzyQuery searcher = new FuzzyQuery(new Term("CarTypeTags", dto.CatetoryL4.ToString()), 0.3f);
                    booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
                }
    
                if (!string.IsNullOrEmpty(dto.Name))
                {
                    //FuzzyQuery wildcardQuery = new FuzzyQuery(new Term("Name", dto.Name));
                    TermQuery searcher = new TermQuery(new Term("Name", dto.Name));
                    booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
                }
    
                if (!string.IsNullOrEmpty(dto.Code))
                {
                    TermQuery searcher = new TermQuery(new Term("Code", dto.Code));
                    booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
                }
    
                if (!string.IsNullOrEmpty(dto.SupplierId))
                {
                    TermQuery searcher = new TermQuery(new Term("SupplierId", dto.SupplierId));
                    booleanQuery.Add(searcher, BooleanClause.Occur.MUST);
                }
    
                return booleanQuery;
            }
    
            private static MutiFilter CreateFilter(QueryCritiriaDTO dto)
            {
    
                MutiFilter mf = new MutiFilter();
    
                       if (dto.Start != CP.Utils.DateTimeUtil.MIN_DATETIME && dto.End != CP.Utils.DateTimeUtil.MIN_DATETIME)
                {
                    mf.AddRangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString());
                }
             //   RangeFilter rf3 = new RangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString(),true, true);
    
    
                return mf;
    
    
    
            }
    
            #region 获取最终的数据
            /// <summary>
            /// 获取最终的数据
            /// </summary>
            /// <param name="scoreDoc"></param>
            /// <param name="pageIndex"></param>
            /// <param name="pageSize"></param>
            /// <param name="totalRec"></param>
            /// <returns></returns>
            private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] scoreDoc, QryPage page)// int pageIndex, int pageSize, int totalRec)
            {
                int start = (page.PageNumber - 1) * page.PerPageSize;
                int end = page.PageNumber * page.PerPageSize;
                if (end > page.TotalCount)
                    end = page.TotalCount;
    
                List<AutoPartDTO> list = new List<AutoPartDTO>();
                for (int index = start; index < end; index++)
                {
                    Document doc = searcher.Doc(scoreDoc[index].doc);
                    // Document doc = Common.GenerateSearcher().Doc(sd.doc);
                    AutoPartDTO autoPartDto = new AutoPartDTO() { };
                    autoPartDto.Id = long.Parse(doc.Get("id"));
                    autoPartDto.Name = doc.Get("Name");
                    
                    list.Add(autoPartDto);
    
                }
                return list;
            }
            /// <summary>
            /// 获取最终的数据
            /// </summary>
            /// <param name="docs"></param>
            /// <returns></returns>
            private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher,  ScoreDoc[] docs)
            {
                if (docs == null || docs.Length == 0)
                    return null;
                List<AutoPartDTO> list = new List<AutoPartDTO>();
                foreach (ScoreDoc sd in docs)
                {
                    Document doc = searcher.Doc(sd.doc);
                    AutoPartDTO autoPartDto = new AutoPartDTO() { };
    
                    autoPartDto.Id = long.Parse(doc.Get("id"));
                    autoPartDto.Name = doc.Get("Name");
                 
                    list.Add(autoPartDto);
                }
                return list;
            }
            #endregion
        }
        public class Common
        {
            public static string ProductIndexPath
            {
                get { return IndexStoredDirectory; }
            }
    
            private static string IndexStoredDirectory = AppDomain.CurrentDomain.BaseDirectory + "auto.index";
        
        }
    
        public class MutiFilter
        {
            private List<Filter> filterList;
            public MutiFilter()
            {
                filterList = new List<Filter>();
            }
            public void AddFilter(String Field, String Value)
            {
                Term term = new Term(Field, Value);//添加term
                QueryFilter filter = new QueryFilter(new TermQuery(term));//添加过滤器
                filterList.Add(filter);//加入List,可以增加多個过滤
            }
            public void AddRangeFilter(string Field, string start, string end)
            {
                Term ts = new Term(Field, start);
                Term te = new Term(Field, end);
                var q = new RangeQuery(ts, te, true);
                //var q = new RangeQuery(begin, end, true);
                var filter = new QueryFilter(q);
                filterList.Add(filter);//加入List,可以增加多個过滤
            }
    
            public Query getFilterQuery(Query query)
            {
                for (int i = 0; i < filterList.Count; i++)
                {
                    //取出多個过滤器,在结果中再次定位结果
                    query = new FilteredQuery(query, filterList[i]);
                }
                return query;
            }
    
        }
       
        public class EsayTooTokenizer : CharTokenizer
        {
            public EsayTooTokenizer(TextReader reader)
                : base(reader)
            {
            }
    
            //单纯按照“,” 空格 分词
            protected override bool IsTokenChar(char c)
            {
                return c == ',' || c == ' ' ? false : true;
            }
        }
    
        public class EsayTooAnalyzer : Analyzer//自定义最简单的分词器
    { public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { return new EsayTooTokenizer(reader); } }

      

  • 相关阅读:
    FZU-Problem 2150 Fire Game
    LeetCode120——Triangle
    Coder-Strike 2014
    AP INVOICES IMPORT API(NOT request)
    NYOJ-277-车牌号
    软件測试方法
    C++中字符数组和字符串string
    【机器学习算法-python实现】PCA 主成分分析、降维
    主题讲座:移动互联网时代的创业机会
    ubuntu环境eclipse配置
  • 原文地址:https://www.cnblogs.com/vinnie520/p/2558987.html
Copyright © 2011-2022 走看看