最近在帮一个朋友忙,帮他们一个软件设计一下架构,该应用程序某核心逻辑设计数据量较大,客户对查询要求又很高。这种需求除了在数据库设计要考虑水平分表,分区视图之类的设计,在程序中也要考虑效率问题,于是就决定使用LUCENE.NET将核心数据做索引文件,作假全文搜索,这样就算数据量由千万级别,查询也只在几秒钟完成,对性能帮助还是有很大帮助。之前也没仔细了解过LUCENE方面技术,正好学习一下。
LUCENE.NET是JAVA移植到.NET平台上的开源技术,技术资料也很丰富。
以下是创建索引代码:
public static void Run() { QryPage qryPage = new QryPage(); qryPage.PerPageSize = 350; qryPage.PageNumber = 0; qryPage.PageCount = 10000; qryPage.NeedInitPageNo = false; List<AutoParts> packages = new List<AutoParts>(); while (qryPage.PageNumber < qryPage.PageCount) { IList<AutoParts> autoPartses = new CustomerQuery().QueryAutoParts(new AutoPartDTO(), ref qryPage);//获取索引数据 foreach (var p in autoPartses) { if (!IsValidProduct(p)) { continue; } packages.Add(p); } qryPage.PageNumber++; } // // Write search item index to file. // Write(packages); } public static void Write(List<AutoParts> packages) { build( packages); } public static void build( List<AutoParts> packages) { var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), true); try { writer.SetMaxFieldLength(1000); writer.SetUseCompoundFile(true); Logger.Info("Indexing to directory '" + Common.ProductIndexPath + "'..."); DateTime start = System.DateTime.Now; indexDocs(writer, packages); Logger.Info("Optimizing..."); writer.Optimize(); writer.Close(); DateTime end = System.DateTime.Now; //Console.Out.WriteLine(end.Ticks - start.Ticks + " total milliseconds"); Logger.Info(end.Ticks - start.Ticks + " total milliseconds"); } catch (Exception e) { Console.WriteLine(e.Message); } } public static void UpdateIndex(AutoParts dto) { try { Term tm = new Term("id", dto.Id.ToString()); var qerty = new TermQuery(tm); var productIndexReader = IndexReader.Open(Common.ProductIndexPath); var searcher = new IndexSearcher(productIndexReader); var his = searcher.Search(qerty); var reader = IndexReader.Open(Common.ProductIndexPath); reader.DeleteDocuments(tm); var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false); AddDocument(dto, writer); writer.Optimize(); writer.Close(); } catch (Exception e) { Console.WriteLine("添加索引出错,配件ID:" + dto.Id + "\n"); Console.Write(e.Message); } } public static void AddDocument(AutoParts dto) { try { var writer = new IndexWriter(Common.ProductIndexPath, new EsayTooAnalyzer(), false); AddDocument(dto, writer); writer.Optimize(); writer.Close(); } catch (Exception e) { Console.WriteLine("添加索引出错,配件ID:"+dto.Id+"\n"); Console.WriteLine(e.Message); throw; } } private static void AddDocument(AutoParts package, IndexWriter getWriter) { Document doc = new Document(); doc.Add(new Field("id", package.Id.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("CarCategoryId", package.CarCategoryId.ToString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("Name", package.Name, Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("Code", package.Code, Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("FSPrice", package.FSPrice, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("YCPrice", package.YCPrice, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("YCCost", package.YCCost, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("YCSupplier", package.YCSupplier, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("FCPrice", package.FCPrice, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("FCCost", package.FCCost, Field.Store.YES, Field.Index.NO)); //doc.Add(new Field("FCSupplier", new StringReader(package.FCSupplier) )); doc.Add(new Field("FCSupplier", package.FCCost, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("CCPrice", package.CCPrice, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("CCCost", package.CCCost, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("CCSupplier", package.CCSupplier, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Repire", package.Repire, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("AskPriceInfo", package.AskPriceInfo, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("AskCustomer", package.AskCustomer, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Description", package.Description, Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Picture1", package.Picture1.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Picture2", package.Picture2.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("Picture3", package.Picture3.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("IsAvaliable", package.IsAvaliable.ToString(), Field.Store.YES, Field.Index.NO)); doc.Add(new Field("CarTypeTags", package.CarTypeTags, Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("ModifiedTime", package.ModifiedTime.ToShortDateString(), Field.Store.YES, Field.Index.UN_TOKENIZED)); doc.Add(new Field("ModifiedBy", package.ModifiedBy, Field.Store.YES, Field.Index.NO)); getWriter.AddDocument(doc); } private static void indexDocs(IndexWriter writer, List<AutoParts> packages) { try { int i = 0; foreach (var package in packages) { i++; Console.WriteLine("生成索引顺序"+i); AddDocument(package, writer); } } catch (Exception e) { Console.Write(e.Message); } } private static bool IsValidProduct(AutoParts autoParts) { return true; } }
其中更新索引方法还在调试,因为发现删除索引方法不成功,还在调试
下面是查询核心算法,其中也包含了分页查询,完全可以按照数据库一致的方式来进行查询,核心数据底层查询就如下查询即可
public static List<AutoPartDTO> Query(QueryCritiriaDTO dto, ref QryPage page)// int pageIndex, int pageSize, out int totalRec) { IndexSearcher searcher; if (page.PageNumber == 0) page.PageNumber = 1; Sort sort = new Sort(new SortField("id", SortField.DOC, false)); Query query = CreateQuery(dto); MutiFilter filter = CreateFilter(dto); query = filter.getFilterQuery(query); var productIndexReader = IndexReader.Open(Common.ProductIndexPath); searcher = new IndexSearcher(productIndexReader); try { TopDocs topDocs = searcher.Search(query, null, page.PageNumber * page.PerPageSize, sort); page.TotalCount = topDocs.totalHits; page.PageCount = (int)Math.Ceiling((decimal)page.TotalCount / (decimal)page.PerPageSize); if (page.PageCount == 1 || page.PageCount == 0) return TopDocs2Data(searcher, topDocs.scoreDocs); return TopDocs2Data(searcher,topDocs.scoreDocs, page); } catch (Exception e) { Console.WriteLine("查询出错"); Console.WriteLine(e.Message); return new List<AutoPartDTO>(); } finally { searcher.Close(); } } private static Query CreateQuery(QueryCritiriaDTO dto) { var booleanQuery = new BooleanQuery(); if (dto.CatetoryL3 != -1 && dto.CatetoryL3 != 0) { TermQuery searcher = new TermQuery(new Term("CarCategoryId", dto.CatetoryL3.ToString())); booleanQuery.Add(searcher, BooleanClause.Occur.MUST); } if (dto.CatetoryL4 != -1 && dto.CatetoryL4 != 0) { FuzzyQuery searcher = new FuzzyQuery(new Term("CarTypeTags", dto.CatetoryL4.ToString()), 0.3f); booleanQuery.Add(searcher, BooleanClause.Occur.MUST); } if (!string.IsNullOrEmpty(dto.Name)) { //FuzzyQuery wildcardQuery = new FuzzyQuery(new Term("Name", dto.Name)); TermQuery searcher = new TermQuery(new Term("Name", dto.Name)); booleanQuery.Add(searcher, BooleanClause.Occur.MUST); } if (!string.IsNullOrEmpty(dto.Code)) { TermQuery searcher = new TermQuery(new Term("Code", dto.Code)); booleanQuery.Add(searcher, BooleanClause.Occur.MUST); } if (!string.IsNullOrEmpty(dto.SupplierId)) { TermQuery searcher = new TermQuery(new Term("SupplierId", dto.SupplierId)); booleanQuery.Add(searcher, BooleanClause.Occur.MUST); } return booleanQuery; } private static MutiFilter CreateFilter(QueryCritiriaDTO dto) { MutiFilter mf = new MutiFilter(); if (dto.Start != CP.Utils.DateTimeUtil.MIN_DATETIME && dto.End != CP.Utils.DateTimeUtil.MIN_DATETIME) { mf.AddRangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString()); } // RangeFilter rf3 = new RangeFilter("ModifiedTime", dto.Start.ToShortDateString(), dto.End.ToShortDateString(),true, true); return mf; } #region 获取最终的数据 /// <summary> /// 获取最终的数据 /// </summary> /// <param name="scoreDoc"></param> /// <param name="pageIndex"></param> /// <param name="pageSize"></param> /// <param name="totalRec"></param> /// <returns></returns> private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] scoreDoc, QryPage page)// int pageIndex, int pageSize, int totalRec) { int start = (page.PageNumber - 1) * page.PerPageSize; int end = page.PageNumber * page.PerPageSize; if (end > page.TotalCount) end = page.TotalCount; List<AutoPartDTO> list = new List<AutoPartDTO>(); for (int index = start; index < end; index++) { Document doc = searcher.Doc(scoreDoc[index].doc); // Document doc = Common.GenerateSearcher().Doc(sd.doc); AutoPartDTO autoPartDto = new AutoPartDTO() { }; autoPartDto.Id = long.Parse(doc.Get("id")); autoPartDto.Name = doc.Get("Name"); list.Add(autoPartDto); } return list; } /// <summary> /// 获取最终的数据 /// </summary> /// <param name="docs"></param> /// <returns></returns> private static List<AutoPartDTO> TopDocs2Data(IndexSearcher searcher, ScoreDoc[] docs) { if (docs == null || docs.Length == 0) return null; List<AutoPartDTO> list = new List<AutoPartDTO>(); foreach (ScoreDoc sd in docs) { Document doc = searcher.Doc(sd.doc); AutoPartDTO autoPartDto = new AutoPartDTO() { }; autoPartDto.Id = long.Parse(doc.Get("id")); autoPartDto.Name = doc.Get("Name"); list.Add(autoPartDto); } return list; } #endregion } public class Common { public static string ProductIndexPath { get { return IndexStoredDirectory; } } private static string IndexStoredDirectory = AppDomain.CurrentDomain.BaseDirectory + "auto.index"; } public class MutiFilter { private List<Filter> filterList; public MutiFilter() { filterList = new List<Filter>(); } public void AddFilter(String Field, String Value) { Term term = new Term(Field, Value);//添加term QueryFilter filter = new QueryFilter(new TermQuery(term));//添加过滤器 filterList.Add(filter);//加入List,可以增加多個过滤 } public void AddRangeFilter(string Field, string start, string end) { Term ts = new Term(Field, start); Term te = new Term(Field, end); var q = new RangeQuery(ts, te, true); //var q = new RangeQuery(begin, end, true); var filter = new QueryFilter(q); filterList.Add(filter);//加入List,可以增加多個过滤 } public Query getFilterQuery(Query query) { for (int i = 0; i < filterList.Count; i++) { //取出多個过滤器,在结果中再次定位结果 query = new FilteredQuery(query, filterList[i]); } return query; } } public class EsayTooTokenizer : CharTokenizer { public EsayTooTokenizer(TextReader reader) : base(reader) { } //单纯按照“,” 空格 分词 protected override bool IsTokenChar(char c) { return c == ',' || c == ' ' ? false : true; } } public class EsayTooAnalyzer : Analyzer//自定义最简单的分词器
{ public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { return new EsayTooTokenizer(reader); } }