zoukankan      html  css  js  c++  java
  • Lucene系列-facet

    1.facet的直观认识

    facet:面、切面、方面。个人理解就是维度,在满足query的前提下,观察结果在各维度上的分布(一个维度下各子类的数目)。

    如jd上搜“手机”,得到4009个商品。其中品牌、网络、价格就是商品的维度(facet),点击某个品牌或者网络,获取更细分的结果。


    点击品牌小米,获得小米手机的结果,显示27个。


    点击移动4G,获得移动4G、小米手机,显示4个。


    2.facet特性

    • facet counting:返回一个facet下某子类的结果数。如上面的品牌维度下小米子类中满足查询"手机"的结果有27个。
    • facet associations:一个文档与某子类的关联度,如一本书30%讲lucene,70%讲solor,这个百分比就是书与分类的关联度(匹配度、信心度)。
    • multiple facet requests:支持多facet查询(多维度查询)。如查询品牌为小米、网络为移动4G的手机。

    3.实例

    一个facet简单使用例子,依赖于lucene-facet-4.10.0。讲述了从搜手机到品牌、到网络向下browser的过程。

    public class SimpleFacetsExample {
        private final Directory indexDir = new RAMDirectory();
        private final Directory taxoDir = new RAMDirectory();
        private final FacetsConfig config = new FacetsConfig();
    
        /** Empty constructor */
        public SimpleFacetsExample() {
            config.setHierarchical("Publish Date", true);
        }
    
        /** Build the example index. */
        private void index() throws IOException {
            IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(Version.LUCENE_4_10_0,
                    new WhitespaceAnalyzer()));
            // Writes facet ords to a separate directory from the main index
            DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    
            Document doc = new Document();
            doc.add(new TextField("device", "手机", Field.Store.YES));
            doc.add(new TextField("name", "米1", Field.Store.YES));
            doc.add(new FacetField("brand", "小米"));
            doc.add(new FacetField("network", "移动4G"));
            indexWriter.addDocument(config.build(taxoWriter, doc));
    
            doc = new Document();
            doc.add(new TextField("device", "手机", Field.Store.YES));
            doc.add(new TextField("name", "米4", Field.Store.YES));
            doc.add(new FacetField("brand", "小米"));
            doc.add(new FacetField("network", "联通4G"));
            indexWriter.addDocument(config.build(taxoWriter, doc));
    
            doc = new Document();
            doc.add(new TextField("device", "手机", Field.Store.YES));
            doc.add(new TextField("name", "荣耀6", Field.Store.YES));
            doc.add(new FacetField("brand", "华为"));
            doc.add(new FacetField("network", "移动4G"));
            indexWriter.addDocument(config.build(taxoWriter, doc));
    
            doc = new Document();
            doc.add(new TextField("device", "电视", Field.Store.YES));
            doc.add(new TextField("name", "小米电视2", Field.Store.YES));
            doc.add(new FacetField("brand", "小米"));
            indexWriter.addDocument(config.build(taxoWriter, doc));
    
            taxoWriter.close();
            indexWriter.close();
        }
    
        private void facetsWithSearch() throws IOException {
            DirectoryReader indexReader = DirectoryReader.open(indexDir);
            IndexSearcher searcher = new IndexSearcher(indexReader);
            TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    
            FacetsCollector fc = new FacetsCollector();
            //1.查询手机
            System.out.println("-----手机-----");
            TermQuery query = new TermQuery(new Term("device", "手机"));
            FacetsCollector.search(searcher, query, 10, fc);
            Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc);
            List<FacetResult> results = facets.getAllDims(10);
            //手机总共有3个,品牌维度:小米2个,华为1个;网络维度:移动4G 2个,联通4G 1个
            for (FacetResult tmp : results) {
                System.out.println(tmp);
            }
            //2.drill down,品牌选小米
            System.out.println("-----小米手机-----");
            DrillDownQuery drillDownQuery = new DrillDownQuery(config, query);
            drillDownQuery.add("brand", "小米");
            FacetsCollector fc1 = new FacetsCollector();//要new新collector,否则会累加
            FacetsCollector.search(searcher, drillDownQuery, 10, fc1);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, fc1);
            results = facets.getAllDims(10);
            //获得小米手机的分布,总数2个,网络:移动4G 1个,联通4G 1个
            for (FacetResult tmp : results) {
                System.out.println(tmp);
            }
    
            //3.drill down,小米移动4G手机
            System.out.println("-----移动4G小米手机-----");
            drillDownQuery.add("network", "移动4G");
            FacetsCollector fc2 = new FacetsCollector();
            FacetsCollector.search(searcher, drillDownQuery, 10, fc2);
            facets = new FastTaxonomyFacetCounts(taxoReader, config, fc2);
            results = facets.getAllDims(10);
            for (FacetResult tmp : results) {
                System.out.println(tmp);
            }
    
            //4.drill sideways,横向浏览
            //如果已经进入了小米手机,但是还想看到其他牌子(华为)的手机数目,就用到了sideways
            System.out.println("-----小米手机drill sideways-----");
            DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
            DrillDownQuery drillDownQuery1 = new DrillDownQuery(config, query);
            drillDownQuery1.add("brand", "小米");
            DrillSidewaysResult result = ds.search(drillDownQuery1, 10);
            results = result.facets.getAllDims(10);
            for (FacetResult tmp : results) {
                System.out.println(tmp);
            }
    
            indexReader.close();
            taxoReader.close();
        }
    
        /** Runs the search and drill-down examples and prints the results. */
        public static void main(String[] args) throws Exception {
            SimpleFacetsExample example = new SimpleFacetsExample();
            example.index();
            example.facetsWithSearch();
        }
    }

    输出:

    -----手机-----
    //总数3个,2个子类
    dim=brand path=[] value=3 childCount=2
      小米 (2)
      华为 (1)
    
    dim=network path=[] value=3 childCount=2
      移动4G (2)
      联通4G (1)
    
    -----小米手机-----
    //普通向下浏览,丢失了同一维度,其他子类的统计
    dim=brand path=[] value=2 childCount=1
      小米 (2)
    
    dim=network path=[] value=2 childCount=2
      移动4G (1)
      联通4G (1)
    
    -----移动4G小米手机-----
    dim=brand path=[] value=1 childCount=1
      小米 (1)
    
    dim=network path=[] value=1 childCount=1
      移动4G (1)
    
    -----小米手机drill sideways-----
    //drill sideways, 保留了该drill维度的其他子类统计
    dim=brand path=[] value=3 childCount=2
      小米 (2)
      华为 (1)
    //小米手机中的网络分布
    dim=network path=[] value=2 childCount=2
      移动4G (1)
      联通4G (1)
  • 相关阅读:
    对国内技术社区
    github访问慢
    MVC 框架搭建
    EntityFramework与Ado.net的对比——EF优势何在?
    什么是DTO ,DTO 有什么作用
    一些有趣的资源推荐
    解决sublime text 3使用Install Package时出现There are no packages available for installation问题
    基于CentOS7的服务器搭建(LAMP环境)
    Windows下UEFI环境的搭建
    Ubuntu16.04下搜狗输入法、Sublime Text 3的安装
  • 原文地址:https://www.cnblogs.com/whuqin/p/4981965.html
Copyright © 2011-2022 走看看