zoukankan      html  css  js  c++  java
  • 【转】全文检索引擎Sphinx配置文件详细介绍

    原文地址:http://blog.sina.com.cn/s/blog_6c971aa301012yfb.html

    ## Sphinx configuration file sample
    # WARNING! While this sample file mentions all available options,
    # it contains (very) short helper descriptions only. Please refer to
    # doc/sphinx.html for details.
    #
    #警告!虽然这个配置文件列出了所有的可用选项,但是它包只包含非常短的帮助描述和简介,请访问并参考doc文件夹下的sphinx.html获得更加详细的帮助

    #############################################################################
    ## data source definition   数据源定义
    #数据源就是数据从哪儿来,也有知道数据源了,我们才能够清楚,我们需要检索的是谁吧?
    #
    #  LAMP兄弟连提示您他的语法格式为:
    #
    #  source 名字{
    #     选项
    #
    #  }
    #############################################################################

    source src1
    {
     # data source type. mandatory, no default value
     # 数据库类,强制性的,没有默认值
     # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
     # 已知的类型包括mysql,pgsql,mssql,xmlpipe,xmlpipe2,odbc
     type     = mysql

     #####################################################################
     ## SQL settings (for 'mysql' and 'pgsql' types)
     #  以下SQL设置专为mysql,pgsql类型提供支持
     #####################################################################

     # some straightforward parameters for SQL source types
     # 一些简单的数据源类型参数
     sql_host    = localhost
     #sql主机
     sql_user    = test
     #sql用户
     sql_pass    = password
     #sql密码
     sql_db      = test
     #sql数据库
     sql_port    = 3306 # optional, default is 3306
     #sql数据库端口,mysql的默认值为3306,可以进行修改

     # UNIX socket name
     # unix soket名称
     # optional, default is empty (reuse client library defaults)
     # 可选,默认为空(重用默认客户端库默认)
     # usually '/var/lib/mysql/mysql.sock' on Linux
     # linux服务器上通常在以下路径'/var/lib/mysql/mysql.sock',根据具体情况不同而进行设置不同
     # usually '/tmp/mysql.sock' on FreeBSD
     # FreeBSD操作系统通常在/tmp/mysql.sock路径下
     #
     # sql_sock    = /tmp/mysql.sock
     # 在此处选择设置sock路径


     # MySQL specific client connection flags
     # mysql 具体的客户端连接标识
     # optional, default is 0
     # 可选,默认置为0
     #
     # mysql_connect_flags = 32 # enable compression
     #                            启用压缩

     # MySQL specific SSL certificate settings
     # mysql 具体的 SSL(安全套接层)证书设置
     # optional, defaults are empty
     # 可选项,默认值为空
     #
     # mysql_ssl_cert  = /etc/ssl/client-cert.pem
     # mysql ssl证书路径
     #
     # mysql_ssl_key  = /etc/ssl/client-key.pem
     # 证书key
     # mysql_ssl_ca  = /etc/ssl/cacert.pem
     # 证书ca(何为CA:CA是证书的签发机构,它是PKI的核心。CA是负责签发证书、认证证书、管理已颁发证书的机关。它要制定政策和具体步骤来验证、识别用户身份,并对用户证书进  # 行签名,以确保证书持有者的身份和公钥的拥有权。)

     # MS SQL specific Windows authentication mode flag
     # MSSQL 具体的身份验证模式标识
     # MUST be in sync with charset_type index-level setting
     # 必须在sync(同步)设置charset_type的索引级别
     # optional, default is 0
     # 可选的,默认值为0
     #
     # mssql_winauth   = 1 # use currently logged on user credentials
     #         使用当前用户登陆凭据


     # MS SQL specific Unicode indexing flag
     # MS SQL 具体的unicode 索引标识
     # optional, default is 0 (request SBCS data)
     # 可选的,默认值为0 (响应SBCS数据)
     # mssql_unicode   = 1 # request Unicode data from server
     #         从服务器响应unicode数据


     # ODBC specific DSN (data source name)
     # ODBC 具体的DSN(注:何为DSN?DSN为数据源名)
     # mandatory for odbc source type, no default value
     # 以下部份设置DSN,根据不同情况进行不同设置
     # odbc_dsn    = DBQ=C:data;DefaultDir=C:data;Driver={Microsoft Text Driver (*.txt; *.csv)};
     # sql_query    = SELECT id, data FROM documents.csv


     # pre-query, executed before the main fetch query
     # 预先查询,执行前的主要取的多值查询操作
     # multi-value, optional, default is empty list of queries
     # 可选,默认值为空的查询清单
     # sql_query_pre   = SET NAMES utf8
     # 预查询设置字符集utf8,切记不要加中横线
     # sql_query_pre   = SET SESSION query_cache_type=OFF
     # 设置查询的SESSION query_cache_type为关闭状态


     # main document fetch query
     # mandatory, integer document ID field MUST be the first selected column
     # 主要文件查询强制提取,整型文档ID字段必须为首选列
     sql_query    =
      SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content
      FROM documents

     # range query setup, query that must return min and max ID values
     # 查询范围设置,查询必须设置一个返回的最大值和最小值
     # optional, default is empty
     # 可选项,默认值为空
     #
     # sql_query will need to reference $start and $end boundaries
     # SQL_QUERY 需要引用一个$start 和$end 边界
     # if using ranged query:
     #
     # sql_query    =
     # SELECT doc.id, doc.id AS group, doc.title, doc.data
     # FROM documents doc
     # WHERE id>=$start AND id<=$end
     #
     # sql_query_range  = SELECT MIN(id),MAX(id) FROM documents
     # 查询范围:查询最小值和最大值从documents文档。  此处的documents文档是指样例中的example.sql中的.sql文件


     # range query step
     # 范围查询步骤
     # optional, default is 1024
     # 可选项,默认值为1024
     #
     # sql_range_step  = 1000


     # unsigned integer attribute declaration
     # 无符号整型声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值,准许任意数量的属性
     # optional bit size can be specified, default is 32
     # 可选的位大小是可以被指定的,默认为32
     #
     # sql_attr_uint   = author_id
     # sql_attr_uint   = forum_id:9 # 9 bits for forum_id 存储9位给forum_id这个字段
     sql_attr_uint   = group_id

     # boolean attribute declaration
     # 布尔属性声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # equivalent to sql_attr_uint with 1-bit size
     # 相当于 sql_sttr_uint 给1位的长度
     # sql_attr_bool   = is_deleted


     # bigint attribute declaration
     # 大整型属性声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # declares a signed (unlike uint!) 64-bit attribute
     # 声明一个签名的64位属性
     # sql_attr_bigint   = my_bigint_id


     # UNIX timestamp attribute declaration
     # unix时间戳声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # similar to integer, but can also be used in date functions
     # 类似的整数,但也可以使用日期函数
     # sql_attr_timestamp = posted_ts
     # sql_attr_timestamp = last_edited_ts
     sql_attr_timestamp  = date_added

     # string ordinal attribute declaration
     # 字符串属性声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # sorts strings (bytewise), and stores their indexes in the sorted list
     # 各种字符串,并且存储他们的索引在排序列表中
     # sorting by this attr is equivalent to sorting by the original strings
     # 排通通过这个属性是等效于由原始字符串排序
     # sql_attr_str2ordinal = author_name


     # floating point attribute declaration
     # 浮点字符串属性声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # values are stored in single precision, 32-bit IEEE 754 format
     #  值存储于单精度中,32位长度,属于IEEE 754格式
     # sql_attr_float = lat_radians
     # sql_attr_float = long_radians


     # multi-valued attribute (MVA) attribute declaration
     # 多值属性声明
     # multi-value (an arbitrary number of attributes is allowed), optional
     # 多值(准许任意数量的属性),可选
     # MVA values are variable length lists of unsigned 32-bit integers
     # 多值属性值 是变长的无符号32位整型
     # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
     # 语法格式是 ATTR-TYPE ATTR-NAME from 源类型 [;QUERY][;RANGE-QUERY]
     # ATTR-TYPE is 'uint' or 'timestamp'
     # 属性类型是uint或者时间戳
     # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
     # 源类型是字段,query或者是ranged-query
     # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
     # 查询是SQL查询使用获取所有(docid,属性值)的对
     # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
     # RANGE-QUERY(区间查询)是用作SQL查询时获取最小ID和最大ID值地,类似于'sql_query_range'
     # sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
     # sql_attr_multi = uint tag from ranged-query;
     # SELECT id, tag FROM tags WHERE id>=$start AND id<=$end;
     # SELECT MIN(id), MAX(id) FROM tags


     # post-query, executed on sql_query completion
     # 查询后,在SQL查询完成执行
     # optional, default is empty
     # 可选项,默认值为空
     # sql_query_post  =

     
     # post-index-query, executed on successful indexing completion
     #
     # post-index-query,执行成功后索引
     # optional, default is empty
     # 可选项,默认值为空
     # $maxid expands to max document ID actually fetched from DB
     # $maxid 扩展至最大的文档ID 实际上取出来自数据库
     # sql_query_post_index = REPLACE INTO counters ( id, val )
     # VALUES ( 'max_indexed_id', $maxid )


     # ranged query throttling, in milliseconds
     # 查询范围限制,以毫秒为单位
     # optional, default is 0 which means no delay
     # 可选,默认为0,这意味着没有延误
     # enforces given delay before each query step
     # 每个查询执行前给予延迟
     sql_ranged_throttle = 0

     # document info query, ONLY for CLI search (ie. testing and debugging)
     # 文档信息查询,只为CLI的搜索
     # optional, default is empty
     # 可选,默认值为空
     # must contain $id macro and must fetch the document by that id
     # 必须包含ID 宏并且必须通过这个ID读取文档
     sql_query_info  = SELECT * FROM documents WHERE id=$id

     # kill-list query, fetches the document IDs for kill-list
     # kill-list 查询,查询这些文档的ID用作kill列表
     # kill-list will suppress matches from preceding indexes in the same query
     # kill-list 从上述指标将取消同一查询匹配
     # optional, default is empty
     # 可选项,默认值为空
     # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex


     # columns to unpack on indexer side when indexing
     # 当索引时字段解压在索引解析器
     # multi-value, optional, default is empty list
     # 多值,可选项,默认值为空列表
     # unpack_zlib = zlib_column
     # unpack_mysqlcompress = compressed_column
     # unpack_mysqlcompress = compressed_column_2


     # maximum unpacked length allowed in MySQL COMPRESS() unpacker
     # 最大解压长度准许mysql COMPRESS() 解压
     # optional, default is 16M
     # 可选项,默认值为16M
     # unpack_mysqlcompress_maxsize = 16M


     #####################################################################
     ## xmlpipe settings
     ## xmlpipe 设置
     #####################################################################

     # type    = xmlpipe
     #
     # 类型=xmlpipe

     # shell command to invoke xmlpipe stream producer
     # shell 全命令行去解析xmlpipe流
     # mandatory
     # 强制
     # xmlpipe_command = cat @CONFDIR@/test.xml

     #####################################################################
     ## xmlpipe2 settings
     ## xmlpipe2 设置
     #####################################################################

     # type    = xmlpipe2
     # xmlpipe_command = cat @CONFDIR@/test2.xml


     # xmlpipe2 field declaration
     # xmlpipe2 字段定义
     # multi-value, optional, default is empty
     # 多值,可选,默认值为空
     #
     # xmlpipe_field    = subject
     # xmlpipe_field    = content


     # xmlpipe2 attribute declaration
     # xmlpipe2 属性定义
     # multi-value, optional, default is empty
     # 多值,可选项,默认值为空
     # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
     # 所有xmlpipe_attr_XXX选项是完全类似的sql_attr_XXX
     # xmlpipe_attr_timestamp = published
     # xmlpipe_attr_uint   = author_id


     # perform UTF-8 validation, and filter out incorrect codes
     # 执行的UTF - 8验证,并过滤掉不正确的代码
     # avoids XML parser choking on non-UTF-8 documents
     # 避免的XML解析器抑制非UTF- 8文件
     # optional, default is 0
     # 可选项,默认值为0
     # xmlpipe_fixup_utf8  = 1
    }


    # inherited source example
    # 继承源样例
    # all the parameters are copied from the parent source,
    # 所有的参数都是复制来源于父级源,
    # and may then be overridden in this source definition
    # 并且可能覆盖这个源定义
    source src1throttled : src1
    {
     sql_ranged_throttle   = 100
    }

    #############################################################################
    ## index definition
    ## 索引定义
    #############################################################################

    # local index example
    # 本机索引样例
    #
    # this is an index which is stored locally in the filesystem
    # 这是一个索引,它存储于本地文件系统
    # all indexing-time options (such as morphology and charsets)
    # 所有索引时间参数(如形态和字符集)
    # are configured per local index
    # 配置如当地索引
    index test1
    {
     # document source(s) to index
     # 文档源去索引
     # multi-value, mandatory
     # 多值,强制
     # document IDs must be globally unique across all sources
     # 文档的所有ID必须是全局的唯一的跨越所有来源
     source   = src1

     # index files path and file name, without extension
     # 索引文件路径和文件名,无扩展
     # mandatory, path must be writable, extensions will be auto-appended
     # 强制,路径必须是可写的,扩展会自动产生
     path   = @CONFDIR@/data/test1

     # document attribute values (docinfo) storage mode
     # 文档属性值(文档信息)存储模式
     # optional, default is 'extern'
     # 可选,默认值为'extern'
     # known values are 'none', 'extern' and 'inline'
     # 已知的值为'none','extern'和'inline'
     docinfo   = extern

     # memory locking for cached data (.spa and .spi), to prevent swapping
     # 内存锁定为缓存(.spa和.spi),以防止交换
     # optional, default is 0 (do not mlock)
     # 可选项,默认值为0 (不进行内存锁)
     # requires searchd to be run from root
     mlock   = 0

     # a list of morphology preprocessors to apply
     # 一个列表预处理去 应用
     # optional, default is empty
     #可选,默认值为空
     # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
     # 内置预处理器是'none','stem_en','stem_ru','stem_enru','soundex'和'metaphone';额外的处理器可用来自libstemmer是'ibstemmer_xxx',其中XXX是算法代码
     # 'soundex', and 'metaphone'; additional preprocessors available from
     # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
     # (see libstemmer_c/libstemmer/modules.txt) 查看libstemmer_c/libstemmer/modules.txt
     #
     # morphology  = stem_en, stem_ru, soundex
     # morphology = libstemmer_german
     # morphology = libstemmer_sv
     morphology  = none

     # minimum word length at which to enable stemming
     #  最小词长度中去产生
     #  optional, default is 1 (stem everything)
     #  可选项,默认值为1(所有)
     # min_stemming_len = 1


     # stopword files list (space separated)
     # 非索引字的文件列表(用空格隔开)
     # optional, default is empty
     # 可选项,默认值为空
     # contents are plain text, charset_table and stemming are both applied
     # 内容为一个纯文本,charset_table和阻止都适用
     # stopwords   = @CONFDIR@/data/stopwords.txt


     # wordforms file, in "mapfrom > mapto" plain text format
     # wordforms 文件“mapfrom> mapto”,纯文本格式
     # optional, default is empty
     # 可选,默认值为空
     #
     # wordforms   = @CONFDIR@/data/wordforms.txt


     # tokenizing exceptions file
     # tokenizing例外文件
     # optional, default is empty
     # 可选项,默认值为空
     # plain text, case sensitive, space insensitive in map-from part
     # 纯文本,大小写敏感,空间不敏感
     # one "Map Several Words => ToASingleOne" entry per line
     #
     # exceptions  = @CONFDIR@/data/exceptions.txt


     # minimum indexed word length
     # 最低索引字长
     # default is 1 (index everything)
     # 默认为1(所有的索引)
     min_word_len  = 1

     # charset encoding type
     # 字符集编码类型
     #   optional, default is 'sbcs'
     #可选项,默认值为sbcs
     # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
     # 可选的类型为sbcs和utf-8
     charset_type  = utf-8

     # charset definition and case folding rules "table"
     # 字符集的定义和案例折叠规则“表”
     # optional, default value depends on charset_type
     # 可选项,默认值取决于charset_type
     # defaults are configured to include English and Russian characters only
     # 默认配置只包括英文和俄文字符
     # you need to change the table to include additional ones
     # 您需要更改的表包含附加字符集
     # this behavior MAY change in future versions
     # 这种行为可能会更改在将来的版本中
     #
     # 'sbcs' default value is
     # sbcs默认值是
     # charset_table  = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
     # utf8的默认值为
     # 'utf-8' default value is
     charset_table  = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F


     # ignored characters list
     # 忽略字符列表
     # optional, default value is empty
     # 可选项,默认值为空
     # ignore_chars  = U+00AD


     # minimum word prefix length to index
     # 最小单词前缀长度索引
     # optional, default is 0 (do not index prefixes)
     # 可选项,默认值为0(不包含索引前缀)
     # min_prefix_len = 0


     # minimum word infix length to index
     # 最小字缀长度索引
     # optional, default is 0 (do not index infixes)
     # 可选项,默认值为0(不包含索引前缀)
     # min_infix_len  = 0


     # list of fields to limit prefix/infix indexing to
     #列出字段去限制前缀、中
     # optional, default value is empty (index all fields in prefix/infix mode)
     # 可选项,默认值为空(索引的所有字段,前缀/中缀模式)
     # prefix_fields  = filename
     # infix_fields  = url, domain


     # enable star-syntax (wildcards) when searching prefix/infix indexes
     # 启动star-syntax(通配符)当搜索时前缀、中缀索引
     # known values are 0 and 1
     # 可选值为0和1
     # optional, default is 0 (do not use wildcard syntax)
     # 可选项,默认值为0(不要使用通配符语法)
     # enable_star  = 1


     # n-gram length to index, for CJK indexing
     # n-gram 长度去索引,为了CJK编码索引
     # only supports 0 and 1 for now, other lengths to be implemented
     # 现在只支持0和1,其他的长度无法去实现
     # optional, default is 0 (disable n-grams)
     # 可选项,默认值为0 关闭n-grams
     # ngram_len    = 1


     # n-gram characters list, for CJK indexing
     # n-gram字符集列表,为CJK字符集索引
     # optional, default is empty
     # 可选项,默认值为空
     # ngram_chars   = U+3000..U+2FA1F


     # phrase boundary characters list
     # 短语边界字符列表
     # optional, default is empty
     #  可选项,默认值为空
     # phrase_boundary  = ., ?, !, U+2026 # horizontal ellipsis


     # phrase boundary word position increment
     # 短语边界位置增量字
     #  optional, default is 0
     #   可选项,默认值为0
     # phrase_boundary_step = 100


     # whether to strip HTML tags from incoming documents
     # 是否过滤HTML标签
     # known values are 0 (do not strip) and 1 (do strip)
     # 已知的值是0(不过虑)和1(过滤)
     # optional, default is 0
     # 可选项,默认值为0
     html_strip    = 0

     # what HTML attributes to index if stripping HTML
     # 哪些HTML属性在索引的时候需要被过滤
     # optional, default is empty (do not index anything)
     # 可选项,默认值为空(不去索引任何项)
     # html_index_attrs  = img=alt,title; a=title;


     # what HTML elements contents to strip
     # 哪些HTML元素索引的时候需要被过滤
     # optional, default is empty (do not strip element contents)
     #可选项,默认为空(不过虑这些元素内容)
     # html_remove_elements = style, script


     # whether to preopen index data files on startup
     # 在启动时是否预开索引数据文件
     # optional, default is 0 (do not preopen), searchd-only
     # 可选项,默认值为0(不预开)
     # preopen     = 1


     # whether to keep dictionary (.spi) on disk, or cache it in RAM
     # 是否保留字典(.SPI)在磁盘上,或者是缓存在内存上
     # optional, default is 0 (cache in RAM), searchd-only
     # 可选项,默认值为0(缓存于内存)
     # ondisk_dict    = 1


     # whether to enable in-place inversion (2x less disk, 90-95% speed)
     # 是否启用就地转化(2倍更少的磁盘,90-95%的速度)
     # optional, default is 0 (use separate temporary files), indexer-only
     # 可选项,默认值为0(使用单独的临时文件)
     # inplace_enable   = 1


     # in-place fine-tuning options
     # 就地微调选项
     # optional, defaults are listed below
     # 可选项,下面列出了默认项
     # inplace_hit_gap   = 0  # preallocated hitlist gap size  预分配差距的大小hitlist
     # inplace_docinfo_gap  = 0  # preallocated docinfo gap size 预分配差距的大小docinfo
     # inplace_reloc_factor = 0.1 # relocation buffer size within arena 搬迁缓冲区大小在舞台上
     # inplace_write_factor = 0.1 # write buffer size within arena 写入缓冲区大小在舞台上


     # whether to index original keywords along with stemmed versions
     # 随着索引是否源于原始版本的关键字
     # enables "=exactform" operator to work  操作人员工作
     #
     # optional, default is 0
     # 可选项,默认值为0
     # index_exact_words  = 1


     # position increment on overshort (less that min_word_len) words
     # 位于增量的overshort(不到min_word_len)词上
     # optional, allowed values are 0 and 1, default is 1
     # 可选项,准许值有0和1,默认值为1
     # overshort_step   = 1


     # position increment on stopword
     # 位于增量的非索引字
     # optional, allowed values are 0 and 1, default is 1
     # 可选项,准许值有0和1,默认值为1
     # stopword_step   = 1
    }


    # inherited index example
    # 继承索引样例
    # all the parameters are copied from the parent index,
    # 所有的参数都是从父复制索引,
    # and may then be overridden in this index definition
    #  然后,可以在该索引覆盖的定义
    index test1stemmed : test1
    {
     path   = @CONFDIR@/data/test1stemmed
     morphology  = stem_en
    }


    # distributed index example
    # 分布式索引的例子
    # this is a virtual index which can NOT be directly indexed,
    # 这是一个虚拟的指标,不能直接索引
    # and only contains references to other local and/or remote indexes
    # 并且只包含其他本地及/或远程索引引用
    index dist1
    {
     # 'distributed' index type MUST be specified
     # '分布'必须指定索引类型
     type    = distributed

     # local index to be searched
     # 本地索引进行搜索
     # there can be many local indexes configured
     #  可以有很多配置本地索引
     local    = test1
     local    = test1stemmed

     # remote agent
     # 远程代理
     # multiple remote agents may be specified
     # 多个远程代理可以指定
     # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
     # TCP连接的语法'hostname:port:index1,[index2[,...]]'
     # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
     # 语法本地UNIX连接 '/path/to/socket:index1,[index2[,...]]'
     agent    = localhost:9313:remote1
     agent    = localhost:9314:remote2,remote3
     # agent    = /var/run/searchd.sock:remote4
            #代理
     # blackhole remote agent, for debugging/testing
     # 黑洞远程代理,调试/测试
     # network errors and search results will be ignored
     # 网络错误和搜索结果将被忽略
     # agent_blackhole  = testbox:9312:testindex1,testindex2
     


     # remote agent connection timeout, milliseconds
     # 远程代理连接超时,毫秒
     # optional, default is 1000 ms, ie. 1 sec
     # 可选项,默认值为1000ms,1秒
     agent_connect_timeout = 1000

     # remote agent query timeout, milliseconds
     #  远程代理查询超时,毫秒
     # optional, default is 3000 ms, ie. 3 sec
     # 可选项,默认值为3000毫秒 相当于3秒
     agent_query_timeout  = 3000
    }

    #############################################################################
    ## indexer settings
    ## 索引器设置
    #############################################################################

    indexer
    {
     # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
     # 内存大小限制,16384K或兆字节(256M的)
     # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
     # 可选项,默认值为32M,最大值为2047M,推荐的是256M到1024M
     mem_limit   = 32M

     # maximum IO calls per second (for I/O throttling)
     # 最高每秒的IO调用
     # optional, default is 0 (unlimited)
     # 可选项,默认值为0 不限制
     # max_iops   = 40


     # maximum IO call size, bytes (for I/O throttling)
     # 最大IO调用大小,字节(用于I / O限制)
     # optional, default is 0 (unlimited)
     # 可选项,默认值为0不限制
     # max_iosize  = 1048576


     # maximum xmlpipe2 field length, bytes
     # 最大xmlpipe2字段长度,字节
     # optional, default is 2M
     # 可选项,默认值为2M
     # max_xmlpipe2_field = 4M


     # write buffer size, bytes
     # 写入缓冲区大小,字节
     #  several (currently up to 4) buffers will be allocated
     # 几个(目前最多4个)的缓冲区将被分配
     # write buffers are allocated in addition to mem_limit
     # 写缓冲区分配除了mem_limit
     # optional, default is 1M
     # 可选项,默认值为1M
     # write_buffer  = 1M
    }

    #############################################################################
    ## searchd settings
    ## searchd 设置
    #
    #############################################################################

    searchd
    {
     # hostname, port, or hostname:port, or /unix/socket/path to listen on
     # 主机名,端口,或者为主机名:端口,或者是/unix/socket/path去监听一个多值,准许多点监听
     # multi-value, multiple listen points are allowed
     # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
     # 可选项,默认值为0.0.0.1:9312(监听所有的接口,端口为9312)
     # listen    = 127.0.0.1
     # listen    = 192.168.0.1:9312
     # listen    = 9312
     # listen    = /var/run/searchd.sock


     # log file, searchd run info is logged here
     # 日志文件,searchd的信息被记录在这里运行
     # optional, default is 'searchd.log'
     log     = @CONFDIR@/log/searchd.log

     # query log file, all search queries are logged here
     # 查询日志文件,所有的搜索记录全部在记录在这儿
     # optional, default is empty (do not log queries)
     # 可选项,默认值为空(不记录查询日志)
     query_log   = @CONFDIR@/log/query.log

     # client read timeout, seconds
     # 客户端读取超时时间,秒为单位
     # optional, default is 5
     # 可选项,默认值为5
     read_timeout  = 5

     # request timeout, seconds
     # 响应请求超时时间,单为为秒
     # optional, default is 5 minutes
     # 可选项,默认值为5分钟
     client_timeout  = 300

     # maximum amount of children to fork (concurrent searches to run)
     #
     # optional, default is 0 (unlimited)
     # 可选项,默认值为0(无限)
     max_children  = 30

     # PID file, searchd process ID file name
     # 进程文件,搜索的处理ID文件名
     # mandatory
     # 必选
     pid_file   = @CONFDIR@/log/searchd.pid

     # max amount of matches the daemon ever keeps in RAM, per-index
     # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
     # default is 1000 (just like Google)
     max_matches   = 1000

     # seamless rotate, prevents rotate stalls if precaching huge datasets
     # optional, default is 1
     # 可选项,默认值为1
     seamless_rotate  = 1

     # whether to forcibly preopen all indexes on startup
     # 是否在启动时强行预开所有的索引
     # optional, default is 0 (do not preopen)
     # 可选项,默认值为0(不进行预开)
     preopen_indexes  = 0

     # whether to unlink .old index copies on succesful rotation.
     # 是否断开,旧的索引复本成功的扭转。
     # optional, default is 1 (do unlink)
     # 可选项,默认值为1(断开连接)
      unlink_old   = 1

     # attribute updates periodic flush timeout, seconds
     # updates will be automatically dumped to disk this frequently
     # optional, default is 0 (disable periodic flush)
     #
     # attr_flush_period = 900


     # instance-wide ondisk_dict defaults (per-index value take precedence)
     # optional, default is 0 (precache all dictionaries in RAM)
     #
     # ondisk_dict_default = 1


     # MVA updates pool size
     # MVA 更新池的大小
     # shared between all instances of searchd, disables attr flushes!
     # 共享的所有实例之间的searchd,禁用属性冲突
     # optional, default size is 1M
     # 可选项,默认值为1M
     mva_updates_pool = 1M

     # max allowed network packet size
     # 最大准许的网络包大小
     # limits both query packets from clients, and responses from agents
     #  限制查询包大小来自客户端和响应代理
     # optional, default size is 8M
     # 可选项,默认值为8M
     max_packet_size  = 8M

     # crash log path
     # 崩溃日志路径
     # searchd will (try to) log crashed query to 'crash_log_path.PID' file
     # searchd 将去偿试记录崩溃查询到'crash_log_path.pid'文件
     # optional, default is empty (do not create crash logs)
     # 可选项,默认值为空。(不记录崩溃日志)
     #   crash_log_path  = @CONFDIR@/log/crash


     # max allowed per-query filter count
     # 最大允许每个查询过滤器计数
     # optional, default is 256
     # 可选项,默认值为256
     max_filters   = 256

     # max allowed per-filter values count
     # 最大允许每个过滤值的计数
     # optional, default is 4096
     # 可选项,默认值为4096
     max_filter_values = 4096


     # socket listen queue length
     # sokect监听队列的长度
     # optional, default is 5
     # 可选项,默认值为5
     # listen_backlog  = 5


     # per-keyword read buffer size
     # 每个关键字读取缓冲区的大小
     # optional, default is 256K
     # 可选项,默认值为256K
     # read_buffer   = 256K


     # unhinted read size (currently used when reading hits)
     # unhinted读取量(目前用在阅读点击)
     # optional, default is 32K
     # 可选项,默认值为32K
     # read_unhinted  = 32K
    }

    # --eof--
    # 结束

      以下是其简单的一个应用:

    <?php
      require ( "../sys/lib/sphinxapi.php" );
      $q = '"东方新闻"';
      $host = '192.168.1.100';
      $port = 9312;
      $mode = SPH_MATCH_EXTENDED;
      $index = '*';
      $cl = new SphinxClient ();
      $cl->SetServer ( $host, $port );
      $cl->SetConnectTimeout ( 1 );
      $cl->SetArrayResult ( true );
      $cl->SetMatchMode ( $mode );
     $res = $cl->Query ( $q, $index );
     
        if ( $res===false )
        {
                    print "Query failed: " . $cl->GetLastError() . ". ";
        } else
        {
         if ( $cl->GetLastWarning() )
                    print "WARNING: " . $cl->GetLastWarning() . " ";
                    print "Query '$q' retrieved $res[total] of $res[total_found] matches in $res[time] sec. ";
                    print "Query stats: ";
          if ( isset($res['words']) && is_array($res["words"]) )
                 foreach ( $res["words"] as $word => $info )
                    print "    '$word' found $info[hits] times in $info[docs] documents ";
                    print " ";
     }
    ?>

     

    参考阅读:

    Sphinx配置文件详细介绍

    http://lovealwaysonline.blog.163.com/blog/static/197692011201261485729837/

    数据源配置:mysql数据源

    http://www.coreseek.cn/products-install/mysql/

    BSD/Linux下的安装测试

    http://www.coreseek.cn/products-install/install_on_bsd_linux/

  • 相关阅读:
    Java 8 新特性-菜鸟教程 (8) -Java 8 日期时间 API
    Java 8 新特性-菜鸟教程 (7) -Java 8 Nashorn JavaScript
    Java 8 新特性-菜鸟教程 (6) -Java 8 Optional 类
    心理相关
    matlab和Visio安装
    论文资料搜集整理(研究现状)
    调式相关
    梅花落与折杨柳
    混合高斯模型——学习笔记
    NSCT,非下采样Contourlet变换——学习笔记
  • 原文地址:https://www.cnblogs.com/phpCHAIN/p/3208913.html
Copyright © 2011-2022 走看看