zoukankan      html  css  js  c++  java
  • [solr]

    solr提供了一个spell check,又叫suggestions,可以用于查询输入的自动完成功能auto-complete。

    参考文献:

    https://cwiki.apache.org/confluence/display/solr/Spell+Checking

    http://www.cnblogs.com/ibook360/archive/2011/11/30/2269077.html

    方法:


    修改core的solrconfig.xml

    加入这段到<config />内

        <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
          <lst name="spellchecker">
            <str name="name">wordbreak</str>
            <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>  
            <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
            <str name="field">content</str>
            <str name="combineWords">true</str>
            <str name="breakWords">true</str>
            <int name="maxChanges">10</int>
          </lst>
        </searchComponent>
        <requestHandler name="/spellcheck" class="org.apache.solr.handler.component.SearchHandler">
          <lst name="defaults">
            <str name="spellcheck">true</str>
            <str name="spellcheck.dictionary">wordbreak</str>
            <str name="spellcheck.count">20</str>
          </lst>
          <arr name="last-components">
            <str>spellcheck</str>
          </arr>
        </requestHandler>

    schema.xml配置:

    <?xml version="1.0" ?>
    <schema name="my core" version="1.1">
    
        <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
        <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
        <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
        <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
        <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
        <fieldtype name="binary" class="solr.BinaryField"/>
        <fieldType name="text_cn" class="solr.TextField">
            <analyzer type="index" class="org.wltea.analyzer.lucene.IKAnalyzer" />
            <analyzer type="query" class="org.wltea.analyzer.lucene.IKAnalyzer" />
            <analyzer>
                <tokenizer class="solr.KeywordTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
            </analyzer>
        </fieldType>
        
        <!-- general -->
        <field name="id" type="long" indexed="true" stored="true" multiValued="false" required="true"/>
        <field name="subject" type="text_cn" indexed="true" stored="true" />
        <field name="content" type="text_cn" indexed="true" stored="true" />
        <field name="category_id" type="long" indexed="true" stored="true" />
        <field name="category_name" type="text_cn" indexed="true" stored="true" />
        <field name="last_update_time" type="tdate" indexed="true" stored="true" />
        <field name="_version_" type="long" indexed="true" stored="true"/>
        
         <!-- field to use to determine and enforce document uniqueness. -->
         <uniqueKey>id</uniqueKey>
    
         <!-- field for the QueryParser to use when an explicit fieldname is absent -->
         <defaultSearchField>subject</defaultSearchField>
    
         <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
         <solrQueryParser defaultOperator="OR"/>
    </schema>

    关键在于这句:

            <analyzer>
                <tokenizer class="solr.KeywordTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
            </analyzer>

    意思是词组搜索

    设置完xml,重启tomcat,在浏览器中运行:

    http://localhost:8899/solr/mycore/spellcheck?spellcheck.build=true

    运行结果:

    然后在浏览器中运行:

    http://localhost:8899/solr/mycore/spellcheck?q=中央&rows=0

    运行结果:

    Java代码:


    Java bean:

    package com.my.entity;
    
    import java.util.Date;
    
    import org.apache.solr.client.solrj.beans.Field;
    
    public class Item {
        @Field
        private long id;
        @Field
        private String subject;
        @Field
        private String content;
        @Field("category_id")
        private long categoryId;
        @Field("category_name")
        private String categoryName;
        @Field("last_update_time")
        private Date lastUpdateTime;
        
        public long getId() {
            return id;
        }
        public void setId(long id) {
            this.id = id;
        }
        public String getSubject() {
            return subject;
        }
        public void setSubject(String subject) {
            this.subject = subject;
        }
        public String getContent() {
            return content;
        }
        public void setContent(String content) {
            this.content = content;
        }
        public long getCategoryId() {
            return categoryId;
        }
        public void setCategoryId(long categoryId) {
            this.categoryId = categoryId;
        }
        public String getCategoryName() {
            return categoryName;
        }
        public void setCategoryName(String categoryName) {
            this.categoryName = categoryName;
        }
        public Date getLastUpdateTime() {
            return lastUpdateTime;
        }
        public void setLastUpdateTime(Date lastUpdateTime) {
            this.lastUpdateTime = lastUpdateTime;
        }
    }

    测试代码:

    package com.my.solr;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.solr.client.solrj.SolrQuery;
    import org.apache.solr.client.solrj.SolrServerException;
    import org.apache.solr.client.solrj.impl.HttpSolrServer;
    import org.apache.solr.client.solrj.impl.XMLResponseParser;
    import org.apache.solr.client.solrj.response.QueryResponse;
    import org.apache.solr.client.solrj.response.SpellCheckResponse;
    import org.apache.solr.client.solrj.response.SpellCheckResponse.Collation;
    import org.apache.solr.client.solrj.response.SpellCheckResponse.Correction;
    import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;
    
    import com.my.entity.Item;
    
    public class TestSolr {
    
        public static void main(String[] args) throws IOException, SolrServerException {
            String url = "http://localhost:8899/solr/mycore";
            HttpSolrServer core = new HttpSolrServer(url);
            core.setMaxRetries(1);
            core.setConnectionTimeout(5000);
            core.setParser(new XMLResponseParser()); // binary parser is used by default
            core.setSoTimeout(1000); // socket read timeout
            core.setDefaultMaxConnectionsPerHost(100);
            core.setMaxTotalConnections(100);
            core.setFollowRedirects(false); // defaults to false
            core.setAllowCompression(true);
    
            // ------------------------------------------------------
            // remove all data
            // ------------------------------------------------------
            core.deleteByQuery("*:*");
            List<Item> items = new ArrayList<Item>();
            items.add(makeItem(1, "cpu", "this is intel cpu", 1, "cpu-intel"));
            items.add(makeItem(2, "cpu AMD", "this is AMD cpu", 2, "cpu-AMD"));
            items.add(makeItem(3, "cpu intel", "this is intel-I7 cpu", 1, "cpu-intel"));
            items.add(makeItem(4, "cpu AMD", "this is AMD 5000x cpu", 2, "cpu-AMD"));
            items.add(makeItem(5, "cpu intel I6", "this is intel-I6 cpu", 1, "cpu-intel-I6"));
            items.add(makeItem(6, "处理器", "中央处理器英特儿", 1, "cpu-intel"));
            items.add(makeItem(7, "处理器AMD", "中央处理器AMD", 2, "cpu-AMD"));
            items.add(makeItem(8, "中央处理器", "中央处理器Intel", 1, "cpu-intel"));
            items.add(makeItem(9, "中央空调格力", "格力中央空调", 3, "air"));
            items.add(makeItem(10, "中央空调海尔", "海尔中央空调", 3, "air"));
            items.add(makeItem(11, "中央空调美的", "美的中央空调", 3, "air"));
            core.addBeans(items);
            // commit
            core.commit();
    
            // ------------------------------------------------------
            // search
            // ------------------------------------------------------
            SolrQuery query = new SolrQuery();
            String token = "中央";
            query.set("qt", "/spellcheck");
            query.set("q", token);
            query.set("spellcheck", "on");
            query.set("spellcheck.build", "true");
            query.set("spellcheck.onlyMorePopular", "true");
    
            query.set("spellcheck.count", "100");
            query.set("spellcheck.alternativeTermCount", "4");
            query.set("spellcheck.onlyMorePopular", "true");
    
            query.set("spellcheck.extendedResults", "true");
            query.set("spellcheck.maxResultsForSuggest", "5");
    
            query.set("spellcheck.collate", "true");
            query.set("spellcheck.collateExtendedResults", "true");
            query.set("spellcheck.maxCollationTries", "5");
            query.set("spellcheck.maxCollations", "3");
    
            QueryResponse response = null;
    
            try {
                response = core.query(query);
                System.out.println("查询耗时:" + response.getQTime());
            } catch (SolrServerException e) {
                System.err.println(e.getMessage());
                e.printStackTrace();
            } catch (Exception e) {
                System.err.println(e.getMessage());
                e.printStackTrace();
            } finally {
                core.shutdown();
            }
    
            SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse();
            if (spellCheckResponse != null) {
                List<Suggestion> suggestionList = spellCheckResponse.getSuggestions();
                for (Suggestion suggestion : suggestionList) {
                    System.out.println("Suggestions NumFound: " + suggestion.getNumFound());
                    System.out.println("Token: " + suggestion.getToken());
                    System.out.print("Suggested: ");
                    List<String> suggestedWordList = suggestion.getAlternatives();
                    for (String word : suggestedWordList) {
                        System.out.println(word + ", ");
                    }
                    System.out.println();
                }
                System.out.println();
                Map<String, Suggestion> suggestedMap = spellCheckResponse.getSuggestionMap();
                for (Map.Entry<String, Suggestion> entry : suggestedMap.entrySet()) {
                    System.out.println("suggestionName: " + entry.getKey());
                    Suggestion suggestion = entry.getValue();
                    System.out.println("NumFound: " + suggestion.getNumFound());
                    System.out.println("Token: " + suggestion.getToken());
                    System.out.print("suggested: ");
    
                    List<String> suggestedList = suggestion.getAlternatives();
                    for (String suggestedWord : suggestedList) {
                        System.out.print(suggestedWord + ", ");
                    }
                    System.out.println("
    
    ");
                }
    
                Suggestion suggestion = spellCheckResponse.getSuggestion(token);
                System.out.println("NumFound: " + suggestion.getNumFound());
                System.out.println("Token: " + suggestion.getToken());
                System.out.print("suggested: ");
                List<String> suggestedList = suggestion.getAlternatives();
                for (String suggestedWord : suggestedList) {
                    System.out.print(suggestedWord + ", ");
                }
                System.out.println("
    
    ");
    
                System.out.println("The First suggested word for solr is : " + spellCheckResponse.getFirstSuggestion(token));
                System.out.println("
    
    ");
    
                List<Collation> collatedList = spellCheckResponse.getCollatedResults();
                if (collatedList != null) {
                    for (Collation collation : collatedList) {
                        System.out.println("collated query String: " + collation.getCollationQueryString());
                        System.out.println("collation Num: " + collation.getNumberOfHits());
                        List<Correction> correctionList = collation.getMisspellingsAndCorrections();
                        for (Correction correction : correctionList) {
                            System.out.println("original: " + correction.getOriginal());
                            System.out.println("correction: " + correction.getCorrection());
                        }
                        System.out.println();
                    }
                }
                System.out.println();
                System.out.println("The Collated word: " + spellCheckResponse.getCollatedResult());
                System.out.println();
            }
    
            System.out.println("查询耗时:" + response.getQTime());
        }
    
        private static Item makeItem(long id, String subject, String content, long categoryId, String categoryName) {
            Item item = new Item();
            item.setId(id);
            item.setSubject(subject);
            item.setContent(content);
            item.setLastUpdateTime(new Date());
            item.setCategoryId(categoryId);
            item.setCategoryName(categoryName);
            return item;
        }
    }

    测试结果:

    这种方式可以使用于对现在数据内容的查询拼写检查。

  • 相关阅读:
    JVM调优总结(转载)
    项目应该如何分层(转载)
    SpringCloud Feign 配置(基于Consul)
    yml配置文件嵌套
    SpringCloud Zuul基于Consul配置及详解
    springBoot聚合项目打包
    SpringCloud Config 配置(基于Consul)
    hibernate的三种状态
    IOS 图片全屏预览
    IOS 下拉菜单
  • 原文地址:https://www.cnblogs.com/HD/p/3993424.html
Copyright © 2011-2022 走看看