1,创建索引
Directory directory=FSDirectory.open(Paths.get(fileName)); IndexWriterConfig iwc=new IndexWriterConfig(new StandadAnalyzer()); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer=new IndexWriter(,directory,iwc); Document doc=new Document(); doc.add(new TextField(name,value)); writer.addDocument(doc); directory.close(); directory.close();
2,简单搜索
Path indexPath = Paths.get(fileName); Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Term term = new Term(field, keyword); Query termQuery = new TermQuery(term); System.out.println("Query:"+termQuery); // 返回前10条 TopDocs tds = searcher.search(termQuery, 10); System.out.println(tds.totalHits); for (ScoreDoc sd : tds.scoreDocs) { // Explanation explanation = searcher.explain(query, sd.doc); // System.out.println("explain:" + explanation + " "); Document doc = searcher.doc(sd.doc); System.out.println("DocID:" + sd.doc); System.out.println("open_id:" + doc.get("open_id")); System.out.println("time:" + doc.get("time")); } dir.close(); reader.close();
3,收集器
实现Collector和LeafCollector
getLeafCollector中获取reader
collect中统计
CountCollector implements Collector,LeafCollector public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { this.reader=context.reader(); return this; } public void collect(int doc) throws IOException { SortedDocValues docValues=reader.getSortedDocValues(field); BytesRef bytesRef=docValues.get(doc); String v=bytesRef.utf8ToString(); // Document document=reader.document(doc); // String v=document.get(field); if (map.containsKey(v)){ map.put(v,map.get(v)+1); }else { map.put(v,1); } }
4,grouping
索引字段需设置分词并设置词项量
FieldType type = new FieldType(); // 索引时保存文档、词项频率、位置信息、偏移信息 type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); type.setStored(true);// 原始字符串全部被保存在索引中 type.setStoreTermVectors(true);// 存储词项量 type.setTokenized(true);// 词条化
long t1=System.currentTimeMillis(); GroupingSearch groupingSearch=new GroupingSearch("time"); groupingSearch.setAllGroups(true); groupingSearch.setCachingInMB(4.0,true); groupingSearch.setFillSortFields(true); IndexSearcher searcher=null; try { Directory directory= FSDirectory.open(Paths.get("testCollect")); IndexReader reader= DirectoryReader.open(directory); searcher=new IndexSearcher(reader); Query query=new MatchAllDocsQuery(); TopGroups<BytesRef> result=groupingSearch.search(searcher,query,0,searcher.getIndexReader().maxDoc()); GroupDocs<BytesRef>[] docs=result.groups; int total=0; for (GroupDocs<BytesRef> groupDocs : docs){ System.out.println(new String(groupDocs.groupValue.bytes)+":"+groupDocs.totalHits); total+=groupDocs.totalHits; } System.out.println("time cost:"+(System.currentTimeMillis()-t1)); System.out.println("总数据量:"+total); int totalGroupCount = result.totalGroupCount; System.out.println(totalGroupCount); }catch (Exception e){ e.printStackTrace(); }
5,facet
维度统计,需设置维度字段FacetField
第一步构造索引Searcher
IndexReader indexReader= DirectoryReader.open(indexDir); IndexSearcher searcher=new IndexSearcher(indexReader);
第二步构造facetReader
TaxonomyReader taxoReader=new DirectoryTaxonomyReader(taxoDir);
第三步,设置收集器
FacetsCollector fc=new FacetsCollector();
第四步搜索
TermQuery query=new TermQuery(new Term("device","手机")); FacetsCollector.search(searcher,query,10,fc);
第五步获取维度统计结果
Facets facets=new FastTaxonomyFacetCounts(taxoReader,config,fc); List<FacetResult> results=facets.getAllDims(10); for (FacetResult tmp:results){ System.out.println(tmp); }
DrillDownQuery可以设置多个查询维度
DrillSideWays搜索同类