zoukankan      html  css  js  c++  java
  • solr5.3的spellcheck功能

    1.增加schema.xml中的检查字段。

    <field name="title" type="text_cn" indexed="true" stored="true" required="false" multiValued="false" />
    
    <fieldType name="text_cn" class="solr.TextField">
            <analyzer type="index">
                <!--使用HanLP分析器进行分词 -->
                <tokenizer class="com.hankcs.lucene.HanLPTokenizerFactory" mode="max-word"/>
                <!-- 将分词结果同时转换为拼音 -->
                <filter  class="com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"  minTermLenght="2" />
                 <filter    class="com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"    minGram="6" maxGram="20" />
            </analyzer>
            <analyzer type="query">
                <tokenizer class="com.hankcs.lucene.HanLPTokenizerFactory" mode="max-word"/>
                <filter class="com.hankcs.lucene.analysis.synonym.SynonymFilterFactory"  synonyms="synonyms.txt" ignoreCase="true" expand="true" format="word2vec"/>
                <filter  class="com.shentong.search.analyzers.PinyinTransformTokenFilterFactory"  minTermLenght="2" />
                <filter    class="com.shentong.search.analyzers.PinyinNGramTokenFilterFactory"    minGram="6" maxGram="20" />
            </analyzer>
        </fieldType>

     

    2.修改solrconfig.xml内容:

    <searchComponent name="spellcheck" class="solr.SpellCheckComponent">  
        <!--<str name="queryAnalyzerFieldType">text_cn</str>  --> 
        <!-- a spellchecker built from a field of the main index   -->   
        <lst name="spellchecker">  
            <str name="name">default</str>  
            <!--这里指明需要根据哪个字段的索引为依据进行拼写检查。现配置 名为 spell 的字段-->  
            <str name="field">title</str>  
            <str name="classname">solr.DirectSolrSpellChecker</str>  
            <str name="distanceMeasure">internal</str>  
            <float name="accuracy">0.5</float>  
            <int name="maxEdits">2</int>  
            <int name="minPrefix">1</int>  
            <int name="maxInspections">5</int>  
            <int name="minQueryLength">2</int>  
            <float name="maxQueryFrequency">0.01</float>  
        </lst>
      </searchComponent>  
      <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">  
        <!--默认参数-->  
        <lst name="defaults">  
          <str name="df">title</str>  
          <str name="spellcheck.dictionary">default</str>  
          <str name="spellcheck">true</str>  
          <!-- <str name="spellcheck.extendedResults">true</str>         
          <str name="spellcheck.count">10</str>  
          <str name="spellcheck.alternativeTermCount">5</str>  
          <str name="spellcheck.maxResultsForSuggest">5</str> -->         
          <str name="spellcheck.collate">true</str>  
          <str name="spellcheck.collateExtendedResults">true</str>    
          <!-- <str name="spellcheck.maxCollationTries">10</str>  
          <str name="spellcheck.maxCollations">5</str> -->            
        </lst>  
        <arr name="last-components">  
          <str>spellcheck</str>  
        </arr>  
      </requestHandler>

    3.重新创建索引使其生效.

    PS:参考了另一位博主的文章http://blog.csdn.net/kevinxxw/article/details/49708311 用的分词器不一样,另外增加了针对拼音的智能纠错

  • 相关阅读:
    windows 11下载地址
    ubuntu 安装无线网卡驱动
    修复因为安装openssl 1.0.0而导致使用ssh和scp产生警告信息
    vim下Java自动补全插件javacomplete
    mysql 配置多实例(mysqld_multi)
    nagios 整合 ganglia 设置邮件、短信报警
    Linux (ubuntu 10.10) 安装两个MySQL
    HDU 3269 P2P File Sharing System
    ubuntu 10.10 安装 sun java
    闲逛计算机系统(一):从HelloWorld说起
  • 原文地址:https://www.cnblogs.com/cuihongyu3503319/p/9447214.html
Copyright © 2011-2022 走看看