zoukankan      html  css  js  c++  java
  • Solr 6.1学习笔记 -- spellcheck 组件

    <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
    
        <str name="queryAnalyzerFieldType">text_general</str>
    
        <!-- 下面的spellchecker使用了DirectSolrSpellChecker, 此类会使用主索引中的term进行拼写检查 -->
        <lst name="spellchecker">
          <str name="name">default</str>
          <str name="field">text</str>
          <str name="classname">solr.DirectSolrSpellChecker</str>
          <!-- spellcheck 使用的编辑距离(由一个词转化成另一个词所做的变动), 默认值是 internal,用的是levenshtein metric-->
          <str name="distanceMeasure">internal</str>
    <!-- 被认为是有效的 spellcheck suggestion的最小精度值,介于0和1之间的值。值越大,匹配的结果数越少。个人理解:假设对 cit进行拼写检查,
    如果此值设置够小的话,"fat","sit"都会被认为是"cit"的正确的拼写。如果此值设置够大,则"sit"会被认为是正确的拼写
    --> <float name="accuracy">0.5</float>
    <!-- 值为 1 或者 2。指示最多有几个字母变动。比如:对"manag"进行拼写检查,则会找到"manager"做为正确的拼写;如果对"mana"进行
    拼写检查,因为"mana"到"manager",需有3个字母的变动,所以"manager"会被遗弃
    --> <int name="maxEdits">2</int>
    <!-- 最小的前辍数。如设置为1,意思是指第一个字母不能错。比如:输入"cinner",虽然和"dinner"只有一个字母的编辑距离,
    但是变动的是第一个字母,所以"dinner"不是"cinner"的正确拼写
    --> <int name="minPrefix">1</int> <!-- maximum number of inspections per result. --> <int name="maxInspections">5</int> <!-- 进行拼写检查所需要的最小的字母数。此处设置为4,表示如果只输入了3个字母,则不会进行拼写检查(3个字母的单词都会写错的话,我也无语了) --> <int name="minQueryLength">4</int> <!-- maximum threshold of documents a query term can appear to be considered for correction --> <float name="maxQueryFrequency">0.01</float> <!-- 被推荐的词在文档中出现的最小频率。整数表示在文档中出现的次数,百分比数表示有百分之多少的文档出现了该推荐词 <float name="thresholdTokenFrequency">.01</float> --> </lst> <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> <lst name="spellchecker"> <str name="name">wordbreak</str> <str name="classname">solr.WordBreakSolrSpellChecker</str> <str name="field">text</str> <str name="combineWords">true</str> <str name="breakWords">true</str> <int name="maxChanges">10</int> </lst>

    <!-- a spellchecker that use an alternate comparator comparatorClass be one of: 1. score (default) 2. freq (Frequency first, then score) 3. A fully qualified class name --> <!-- <lst name="spellchecker"> <str name="name">freq</str> <str name="field">lowerfilt</str> <str name="classname">solr.DirectSolrSpellChecker</str> <str name="comparatorClass">freq</str> --> </searchComponent> <!-- A request handler for demonstrating the spellcheck component. NOTE: This is purely as an example. The whole purpose of the SpellCheckComponent is to hook it into the request handler that handles your normal user queries so that a separate request is not needed to get suggestions. IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! See http://wiki.apache.org/solr/SpellCheckComponent for details on the request parameters. --> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <!-- Solr will use suggestions from both the 'default' spellchecker and from the 'wordbreak' spellchecker and combine them. collations (re-written queries) can include a combination of corrections from both spellcheckers --> <str name="spellcheck.dictionary">default</str> <str name="spellcheck.dictionary">wordbreak</str> <str name="spellcheck">on</str>

    <!-- 不仅返回建议词在索引中的频率,而且还返回原始的term在索引中的频率 --> <str name="spellcheck.extendedResults">true</str> <str name="spellcheck.count">10</str>

    <!-- 同时对多个单词进行拼写检查时,每个单词返回的建议词的数量 --> <str name="spellcheck.alternativeTermCount">5</str>

       <!-- 当查询的匹配结果超过maxResultsForSuggest设置的值时,则不再进行拼写检查。此处设置成5,是指
    如果输入"solr",假设能找到6条匹配的记录,则不再对"solr"进行拼写检查 --> <str name="spellcheck.maxResultsForSuggest">5</str>
    <!-- Solr会基于建议词生成新的查询以替换不正确的拼写的查询 -->
    <str name="spellcheck.collate">true</str>

    <!-- 意思与spellcheck.extendedResults相同,如果spellcheck.extendedResults设置为false,则此值将会被忽略 --> <str name="spellcheck.collateExtendedResults">true</str>
    <!-- 最多试几次 --> <str name="spellcheck.maxCollationTries">10</str>

    <!-- 最多返回多少个校验结果 --> <str name="spellcheck.maxCollations">5</str> </lst> <arr name="last-components"> <str>spellcheck</str> </arr> </requestHandler>
  • 相关阅读:
    转:验证日期的正则表达式比较全面地验证
    IIS应用地址池监控
    Net预编译 真的好用与否
    关键字检索,找到所有数据
    vue 文件上传自定义实现
    docker 基础(一)
    input表单中嵌入百度地图
    linux系统光盘开机自动挂载-配置本地yum源
    linux学习笔记基础篇(一)
    构建apache web 服务器
  • 原文地址:https://www.cnblogs.com/langfanyun/p/6000430.html
Copyright © 2011-2022 走看看