zoukankan      html  css  js  c++  java
  • bobobrowse为Lucene添加分组统计

    http://www.cnblogs.com/ibook360/archive/2011/11/15/2250018.html

    bobo-browse为Lucene添加分组统计

    Bobo-browse是一个基于lucene的搜索结果分组统计开源插件,可以完成对搜索结果的分组面统计,比如在淘宝上搜索“衬衣”,在搜索结果顶上显示 “长袖衬衫(10321) 短袖衬衫(32561) ”等。
    虽然Lucene 在3.2.0也提供了Grouping组件来提供分组统计功能,另作讨论,这里不做讨论。


          据Bobo-browse项目介绍, Linkedin.com使用了该组件。

          Bobo-browse仅关注搜索,对索引创建与它无关,索引的创建,继续使用标准的Lucene索引创建方法创建。
          Bobo-browse项目地址为 http://sna-projects.com/bobo/

    以下是测试代码使用的组件及版本:
    Bobo-browse: 2.5.0-rc1
    Lucene: 3.4.0
    log4j: 1.2.16
    Fastutil: 6.4

     

    测试代码:

    import java.util.*;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopScoreDocCollector;
    import org.apache.lucene.index.*;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.search.*;
    import org.apache.lucene.queryParser.*;

    import com.browseengine.bobo.api.BoboBrowser;
    import com.browseengine.bobo.api.BoboIndexReader;
    import com.browseengine.bobo.api.Browsable;
    import com.browseengine.bobo.api.BrowseFacet;
    import com.browseengine.bobo.api.BrowseHit;
    import com.browseengine.bobo.api.BrowseRequest;
    import com.browseengine.bobo.api.BrowseResult;
    import com.browseengine.bobo.api.FacetAccessible;
    import com.browseengine.bobo.api.FacetSpec;
    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
    import com.browseengine.bobo.facets.FacetHandler;
    import com.browseengine.bobo.facets.impl.*;
    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
    import com.browseengine.bobo.facets.data.PredefinedTermListFactory;

    public class TestBoboBrowse {

    private Directory indexDir = null;
    private Version luceneVersion = Version.LUCENE_34;

    /**
    *
    @param args
    */
    public static void main(String[] args) {
    // TODO Auto-generated method stub
    TestBoboBrowse app = new TestBoboBrowse();
    app.test1();
    }

    public void test1(){

    try{
    indexDir = new RAMDirectory();

    createIndex();
    searchTest();

    indexDir.close();
    }catch(Exception ex){
    ex.printStackTrace();
    }
    }

    public void createIndex(){
    String[][] data = new String[][]{
    new String[] {"lenovo", "Intel", "PC Core2 E8200 2GB DDR2 250GB 7200RPM 22LCD", "08998"},
    new String[] {"lenovo", "Intel", "PC Core2 E8300 2GB DDR2 320GB 7200RPM 22LCD", "09998"},
    new String[] {"lenovo", "Intel", "PC Core2 Q6600 2GB DDR2 320GB 7200RPM 22LCD", "11998"},
    new String[] {"lenovo", "Intel", "PC Core2 QX9770 4GB DDR2 320GB 7200RPM RAID-1 22LCD", "19998"},
    new String[] {"lenovo", "Intel", "PC pentium E2200 1GB DDR2 160GB 5400RPM 19LCD", "05998"},
    new String[] {"hp", "Intel", "PC pentium E2180 1GB DDR2 160GB 7200RPM 20LCD", "06398"},
    new String[] {"hp", "Intel", "PC Core2 E8200 2GB DDR2 250GB 5400RPM 22LCD", "08998"},
    new String[] {"hp", "Intel", "PC Core2 E6550 2GB DDR2 250GB 7200RPM 20LCD", "07398"},
    new String[] {"hp", "Intel", "PC Core2 QX6850 4GB DDR2 320GB 5400RPM 22LCD", "13998"},
    new String[] {"asus", "AMD", "PC Core2 QX9650 4GB DDR2 450GB 7200RPM 22LCD", "17998"},
    new String[] {"dell", "AMD", "PC Core2 athlon FX76 4GB DDR2 450GB 7200RPM 22LCD", "12998"}
    };

    try{
    Analyzer analyzer = new StandardAnalyzer(luceneVersion);

    // Lucene Version >= 3.2.0 (Version.LUCENE_32)
    IndexWriterConfig indexConfig = new IndexWriterConfig(luceneVersion, analyzer);
    indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter indexWriter = new IndexWriter(indexDir, indexConfig);

    // Lucene Version < 3.2.0 (Version.LUCENE_32)
    // IndexWriter indexWriter = new IndexWriter(indexDir, analyzer, true, MaxFieldLength.LIMITED);

    for(int i = 0; i < data.length; i++){
    Document doc = new Document();
    doc.add(new Field("vendor",data[i][0], Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("cpu",data[i][1], Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("desc",data[i][2], Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("price",data[i][3], Field.Store.YES, Field.Index.NOT_ANALYZED));
    indexWriter.addDocument(doc);
    }

    indexWriter.optimize();
    indexWriter.commit();
    indexWriter.close();
    }catch(Exception ex){
    ex.printStackTrace();
    }
    }

    private void searchTest(){
    try{
    String fieldName = "desc";
    String keywords = "Core2";
    QueryParser queryParser = new QueryParser(luceneVersion, fieldName, new StandardAnalyzer(luceneVersion));
    Query query = queryParser.parse(keywords);

    IndexReader indexReader = IndexReader.open(indexDir,true);

    List<FacetHandler<?>> facetHandlers = new ArrayList<FacetHandler<?>>();
    facetHandlers.add(new SimpleFacetHandler("vendor"));
    facetHandlers.add(new SimpleFacetHandler("cpu"));

    //facetHandlers.add(new RangeFacetHandler("price", Arrays.asList(new String[]{"[* TO 09998]", "[09999 TO 11998]", "[11999 TO *]"})));
    String[] ranges = new String[]{"[00000 TO 09999]", "[10000 TO 11998]", "[11999 TO 30000]"};
    facetHandlers.add(new RangeFacetHandler("price", new PredefinedTermListFactory(Integer.class, "0"), Arrays.asList(ranges)));

    BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(indexReader,facetHandlers);
    BrowseRequest browseRequest = new BrowseRequest();
    browseRequest.setCount(10);
    browseRequest.setOffset(0);

    browseRequest.setQuery(query);

    //SortField[] sortFields = new SortField[2]; //排序
    //sortFields[0] = new SortField("vendor", SortField.STRING, true);
    //sortFields[1] = new SortField("price", SortField.INT, true);
    //browseRequest.setSort(sortFields);

    FacetSpec facetSpec = new FacetSpec();
    facetSpec.setMaxCount(10);// 搜索出来的标签数目
    facetSpec.setOrderBy(FacetSortSpec.OrderHitsDesc);

    browseRequest.setFacetSpec("vendor", facetSpec);
    browseRequest.setFacetSpec("cpu", facetSpec);
    browseRequest.setFacetSpec("price", facetSpec);

    Browsable browser = new BoboBrowser(boboIndexReader);
    BrowseResult browseResult = browser.browse(browseRequest);

    int totalHits = browseResult.getNumHits();
    BrowseHit[] browseHit = browseResult.getHits();

    System.out.println("=====Total records: "+totalHits);

    // 获取分组统计结果
    Map<String,FacetAccessible> facetMap = browseResult.getFacetMap();
    if(facetMap.size() > 0){
    System.out.println("-------Vendor-----------------------");

    FacetAccessible vendorFacets = facetMap.get("vendor");
    List<BrowseFacet> facetVals = vendorFacets.getFacets();
    for(BrowseFacet f:facetVals){
    System.out.println(f.getValue() + "(" + f.getHitCount() + ")");
    }

    System.out.println("-------CPU----------------");
    FacetAccessible cpuFacets = facetMap.get("cpu");
    facetVals = cpuFacets.getFacets();
    for(BrowseFacet f:facetVals){
    System.out.println(f.getValue() + "(" + f.getHitCount() + ")");
    }

    System.out.println("-------Price----------------");
    FacetAccessible priceFacets = facetMap.get("price");
    facetVals = priceFacets.getFacets();
    for(BrowseFacet f:facetVals){
    System.out.println(f.getValue() + "(" + f.getHitCount() + ")");
    }
    }

    boboIndexReader.close();
    indexReader.close();
    }catch(Exception ex){
    ex.printStackTrace();
    }
    }
    }

     

    输出结果:

    Total records: 9
    -------Vendor-----------------------
    lenovo(4)
    hp(3)
    asus(1)
    dell(1)
    -------CPU----------------
    Intel(7)
    AMD(2)
    -------Price----------------
    [00000 TO 09999](4)
    [11999 TO 30000](4)
    [10000 TO 11998](1)
  • 相关阅读:
    hdu
    如何在maven中的项目使用tomcat插件
    Intellij IDEA 像eclipse那样给maven添加依赖,且Intellij idea里在pom.xml里添加Maven依赖,本地仓库下拉列表显示包很少的血的经验
    DataTables warning: table id=costitemProject
    Navicat Premium Mac 12 破解
    mac显示隐藏的文件
    tomcat7下载地址
    mac同时安装jdk7和jdk8
    屏蔽datatable错误提示
    mac上配置java jdk环境
  • 原文地址:https://www.cnblogs.com/lexus/p/2291831.html
Copyright © 2011-2022 走看看