zoukankan      html  css  js  c++  java
  • lucene学习笔记

    1,创建索引

      

    Directory directory=FSDirectory.open(Paths.get(fileName));
    IndexWriterConfig iwc=new IndexWriterConfig(new StandadAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);
    IndexWriter writer=new IndexWriter(,directory,iwc);
    Document doc=new Document();
    doc.add(new TextField(name,value));
    writer.addDocument(doc);
    directory.close();
    directory.close();
    

      2,简单搜索

    Path indexPath = Paths.get(fileName);
    Directory dir = FSDirectory.open(indexPath);
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
            
    Term term = new Term(field, keyword);
    Query termQuery = new TermQuery(term);
    System.out.println("Query:"+termQuery);
            
    // 返回前10条
    TopDocs tds = searcher.search(termQuery, 10);
    System.out.println(tds.totalHits);
    for (ScoreDoc sd : tds.scoreDocs) {
        // Explanation explanation = searcher.explain(query, sd.doc);
        // System.out.println("explain:" + explanation + "
    ");
        Document doc = searcher.doc(sd.doc);
        System.out.println("DocID:" + sd.doc);
        System.out.println("open_id:" + doc.get("open_id"));
        System.out.println("time:" + doc.get("time"));
    }
    dir.close();
    reader.close();

    3,收集器

     实现Collector和LeafCollector

      getLeafCollector中获取reader

      collect中统计

     CountCollector implements Collector,LeafCollector 
    
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
            this.reader=context.reader();
            return this;
        }
    
     public void collect(int doc) throws IOException {
            SortedDocValues docValues=reader.getSortedDocValues(field);
            BytesRef bytesRef=docValues.get(doc);
            String v=bytesRef.utf8ToString();
    //        Document document=reader.document(doc);
    //        String v=document.get(field);
            if (map.containsKey(v)){
                map.put(v,map.get(v)+1);
            }else {
                map.put(v,1);
            }
        }

    4,grouping

      索引字段需设置分词并设置词项量

    FieldType type = new FieldType();
            // 索引时保存文档、词项频率、位置信息、偏移信息
            type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            type.setStored(true);// 原始字符串全部被保存在索引中
            type.setStoreTermVectors(true);// 存储词项量
            type.setTokenized(true);// 词条化

     

    long t1=System.currentTimeMillis();
            GroupingSearch groupingSearch=new GroupingSearch("time");
            groupingSearch.setAllGroups(true);
            groupingSearch.setCachingInMB(4.0,true);
            groupingSearch.setFillSortFields(true);
            IndexSearcher searcher=null;
            try {
                Directory directory= FSDirectory.open(Paths.get("testCollect"));
                IndexReader reader= DirectoryReader.open(directory);
                searcher=new IndexSearcher(reader);
                Query query=new MatchAllDocsQuery();
                TopGroups<BytesRef> result=groupingSearch.search(searcher,query,0,searcher.getIndexReader().maxDoc());
                GroupDocs<BytesRef>[] docs=result.groups;
                int total=0;
                for (GroupDocs<BytesRef> groupDocs : docs){
                    System.out.println(new String(groupDocs.groupValue.bytes)+":"+groupDocs.totalHits);
                    total+=groupDocs.totalHits;
                }
                System.out.println("time cost:"+(System.currentTimeMillis()-t1));
                System.out.println("总数据量:"+total);
                int totalGroupCount = result.totalGroupCount;
                System.out.println(totalGroupCount);
            }catch (Exception e){
                e.printStackTrace();
            }

    5,facet

      维度统计,需设置维度字段FacetField

      第一步构造索引Searcher

      

    IndexReader indexReader= DirectoryReader.open(indexDir);
    IndexSearcher searcher=new IndexSearcher(indexReader);

      第二步构造facetReader

      

    TaxonomyReader taxoReader=new DirectoryTaxonomyReader(taxoDir);

      第三步,设置收集器

    FacetsCollector fc=new FacetsCollector();

      第四步搜索

    TermQuery query=new TermQuery(new Term("device","手机"));
    FacetsCollector.search(searcher,query,10,fc);

      第五步获取维度统计结果

    Facets facets=new FastTaxonomyFacetCounts(taxoReader,config,fc);
    List<FacetResult> results=facets.getAllDims(10);
    for (FacetResult tmp:results){
         System.out.println(tmp);
    }

      DrillDownQuery可以设置多个查询维度

      DrillSideWays搜索同类

  • 相关阅读:
    jstat使用
    oracle 定期copy 大表统计信息(分区表)
    PL/SQL注册码
    Linux系统--命令行安装weblogic10.3.6
    oracle 11.2.0.4 dbca创建数据库时 报错ORA-12532
    自动重建索引脚本
    oracle 添加登陆数据库触发器--记录IP 地址
    oracle 触发器
    oracle 定位SQL
    查询rman 备份信息集
  • 原文地址:https://www.cnblogs.com/jinjixia/p/8573616.html
Copyright © 2011-2022 走看看