zoukankan      html  css  js  c++  java
  • Sphinx初探之安装

    在Centos or redhat 安装Sphinx
    1.首先安装依赖包
    $ yum install postgresql-libs unixODBC
    2.安装软件
    $ rpm -Uhv sphinx-2.2.1-1.rhel6.x86_64.rpm
    3.启动服务
    $ service searchd start
    
    [root@face sphinx-2.2.10]# find / -name sphinx
    /var/run/sphinx
    /var/log/sphinx
    /var/lib/sphinx
    /etc/logrotate.d/sphinx
    /etc/sphinx
    /usr/share/sphinx
    /usr/share/sphinx/api/ruby/spec/sphinx
    /usr/share/sphinx/api/ruby/lib/sphinx
    
    mysql> #创建测试库
    mysql> create database test;
    Query OK, 1 row affected (0.00 sec)
    #插入测试数据
    mysql -uroot -ppassword test < /usr/share/doc/sphinx-2.2.10/example.sql
    
    mysql> desc tags;
    +-------+---------+------+-----+---------+-------+
    | Field | Type    | Null | Key | Default | Extra |
    +-------+---------+------+-----+---------+-------+
    | docid | int(11) | NO   | PRI | NULL    |       |
    | tagid | int(11) | NO   | PRI | NULL    |       |
    +-------+---------+------+-----+---------+-------+
    2 rows in set (0.00 sec)
    #表结构
    mysql> desc documents;
    +------------+--------------+------+-----+---------+----------------+
    | Field      | Type         | Null | Key | Default | Extra          |
    +------------+--------------+------+-----+---------+----------------+
    | id         | int(11)      | NO   | PRI | NULL    | auto_increment |
    | group_id   | int(11)      | NO   |     | NULL    |                |
    | group_id2  | int(11)      | NO   |     | NULL    |                |
    | date_added | datetime     | NO   |     | NULL    |                |
    | title      | varchar(255) | NO   |     | NULL    |                |
    | content    | text         | NO   |     | NULL    |                |
    +------------+--------------+------+-----+---------+----------------+
    6 rows in set (0.00 sec)
    #表数据
    mysql> select * from documents;
    +----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
    | id | group_id | group_id2 | date_added          | title           | content                                                                   |
    +----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
    |  1 |        1 |         5 | 2016-01-15 14:24:23 | test one        | this is my test document number one. also checking search within phrases. |
    |  2 |        1 |         6 | 2016-01-15 14:24:23 | test two        | this is my test document number two                                       |
    |  3 |        2 |         7 | 2016-01-15 14:24:23 | another doc     | this is another group                                                     |
    |  4 |        2 |         8 | 2016-01-15 14:24:23 | doc number four | this is to test groups                                                    |
    +----+----------+-----------+---------------------+-----------------+---------------------------------------------------------------------------+
    4 rows in set (0.00 sec)
    
    mysql> select * from tags;
    +-------+-------+
    | docid | tagid |
    +-------+-------+
    |     1 |     1 |
    |     1 |     3 |
    |     1 |     5 |
    |     1 |     7 |
    |     2 |     2 |
    |     2 |     4 |
    |     2 |     6 |
    |     3 |    15 |
    |     4 |     7 |
    |     4 |    40 |
    +-------+-------+
    10 rows in set (0.00 sec)
    
    #sphinx的配置文件
    [root@face sphinx]# grep "^$|^#" -v sphinx.conf
    source src1
    {
        #数据库类型 数据来源信息 type
    = mysql
        #数据库的IP sql_host
    = localhost #数据库用户
         sql_user
    = root
        #数据库密码 sql_pass
    =xxxxxx
    #数据库库名 sql_db = test
         #默认的配置文件没有这一句 需要注意 sql_sock
    = /tmp/mysql.sock sql_port = 3306 # optional, default is 3306
         #
    定义取数据的SQL,第一列ID列必须为唯一的正整数值
         sql_query = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content
                    FROM documents
            sql_attr_uint           = group_id
            sql_attr_timestamp      = date_added
    }
    index test1
    {
        #定义索引的源 source
    = src1
    #设置生成的索引存放路径 path
    = /var/lib/sphinx/test1 } index testrt {
    #实时索引类型 type
    = rt rt_mem_limit = 128M path = /var/lib/sphinx/testrt rt_field = title rt_field = content rt_attr_uint = gid }
    #定义indexer配置选项 indexer {
         #定义生成索引过程使用索引的限制 mem_limit
    = 128M }
    ##定义searchd守护进程的相关选项 searchd {
         #tcp        0      0 0.0.0.0:9312                0.0.0.0:*                   LISTEN      9289/searchd     listen
    = 9312 listen = 9306:mysql41 #进程服务日志
         log
    = /var/log/sphinx/searchd.log
         #查询日志 query_log
    = /var/log/sphinx/query.log
         #网络客服端请求的读超时 时间 read_timeout
    = 5 #子进程数
    max_children
    = 30 #进程文件的路径
    pid_file
    = /var/run/sphinx/searchd.pid

    #启用无缝seamless轮转,防止searchd轮转在需要预取大量数据的索引时停止响应
    #也就是说在任何时刻查询都可用,或者使用旧索引,或者使用新索引

            seamless_rotate         = 1
            preopen_indexes         = 1
            unlink_old              = 1
            workers                 = threads # for RT to work
            binlog_path             = /var/lib/sphinx/
    }
    [root@face sphinx]# service searchd start
    Starting searchd: Sphinx 2.2.10-id64-release (2c212e0)
    Copyright (c) 2001-2015, Andrew Aksyonoff
    Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)
    
    using config file '/etc/sphinx/sphinx.conf'...
    listening on all interfaces, port=9312
    listening on all interfaces, port=9306
    precaching index 'test1'
    WARNING: index 'test1': preload: /var/lib/sphinx/test1.sph is invalid header file (too old index version?); NOT SERVING
    precaching index 'testrt'
    precached 2 indexes in 0.004 sec
                                                               [  OK  ]
    #如果启动有warning 注意这儿的权限
    [root@face sphinx]# pwd
    /var/lib/sphinx
    [root@face sphinx]# chown sphinx.sphinx ./ -R
    [root@face sphinx]# 
    [root@face sphinx]# ll
    total 36
    -rw-------. 1 sphinx sphinx   8 Jan 15 15:28 binlog.001
    -rw-------. 1 sphinx sphinx   0 Jan 15 15:28 binlog.lock
    -rw-------. 1 sphinx sphinx  11 Jan 15 15:28 binlog.meta
    -rw-r--r--. 1 sphinx sphinx 128 Jan 15 15:32 test1.spa
    -rw-r--r--. 1 sphinx sphinx 148 Jan 15 15:32 test1.spd
    -rw-r--r--. 1 sphinx sphinx   1 Jan 15 15:32 test1.spe
    -rw-r--r--. 1 sphinx sphinx 371 Jan 15 15:32 test1.sph
    -rw-r--r--. 1 sphinx sphinx 190 Jan 15 15:32 test1.spi
    -rw-r--r--. 1 sphinx sphinx   0 Jan 15 15:32 test1.spk
    -rw-r--r--. 1 sphinx sphinx   0 Jan 15 15:32 test1.spm
    -rw-r--r--. 1 sphinx sphinx  37 Jan 15 15:32 test1.spp
    -rw-r--r--. 1 sphinx sphinx   1 Jan 15 15:32 test1.sps
    -rw-------. 1 sphinx sphinx   0 Jan 15 15:28 testrt.lock
    
    #不能有warning 开始的时候
    [root@face sphinx]# service searchd restart
    Stopping searchd:                                          [  OK  ]
    Starting searchd: Sphinx 2.2.10-id64-release (2c212e0)
    Copyright (c) 2001-2015, Andrew Aksyonoff
    Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)
    
    using config file '/etc/sphinx/sphinx.conf'...
    listening on all interfaces, port=9312
    listening on all interfaces, port=9306
    precaching index 'test1'
    precaching index 'testrt'                                   
    precached 2 indexes in 0.001 sec
                                                               [  OK  ]
    #不+ --rotate有可能起不来
    [root@face sphinx]# indexer --all --rotate
    Sphinx 2.2.10-id64-release (2c212e0)
    Copyright (c) 2001-2015, Andrew Aksyonoff
    Copyright (c) 2008-2015, Sphinx Technologies Inc (http://sphinxsearch.com)
    
    using config file '/etc/sphinx/sphinx.conf'...
    indexing index 'test1'...
    collected 4 docs, 0.0 MB
    sorted 0.0 Mhits, 100.0% done
    total 4 docs, 193 bytes
    total 0.003 sec, 58949 bytes/sec, 1221.74 docs/sec
    skipping non-plain index 'testrt'...
    total 4 reads, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
    total 12 writes, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
    rotating indices: successfully sent SIGHUP to searchd (pid=9289).
    #连接server 查看数据状态
    [root@face sphinx]# mysql -h0 -P9306
    Welcome to the MySQL monitor.  Commands end with ; or g.
    Your MySQL connection id is 1
    Server version: 2.2.10-id64-release (2c212e0)
    
    Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
    
    Oracle is a registered trademark of Oracle Corporation and/or its
    affiliates. Other names may be trademarks of their respective
    owners.
    
    Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.
    
    mysql> show tables;
    +--------+-------+
    | Index  | Type  |
    +--------+-------+
    | test1  | local |
    | testrt | rt    |
    +--------+-------+
    2 rows in set (0.00 sec)
    
    mysql> select * from test1;
    +------+----------+------------+
    | id   | group_id | date_added |
    +------+----------+------------+
    |    1 |        1 | 1452839063 |
    |    2 |        1 | 1452839063 |
    |    3 |        2 | 1452839063 |
    |    4 |        2 | 1452839063 |
    +------+----------+------------+
    4 rows in set (0.00 sec)
    mysql> select * from testrt;
    Empty set (0.00 sec)

    mysql> INSERT INTO testrt VALUES ( 1, 'first record', 'test one', 123 );

    Query OK, 1 row affected (0.00 sec)

    mysql> INSERT INTO testrt VALUES ( 2, 'second record', 'test two', 234 );
    Query OK, 1 row affected (0.00 sec)

    mysql> INSERT INTO testrt VALUES ( 3, 'three record', 'three', 334 );
    Query OK, 1 row affected (0.00 sec)

    mysql> SELECT * FROM testrt;
    +------+------+
    | id | gid |
    +------+------+
    | 1 | 123 |
    | 2 | 234 |
    | 3 | 334 |
    +------+------+
    3 rows in set (0.00 sec)

    mysql> SELECT * FROM testrt WHERE MATCH('test');
    +------+------+
    | id | gid |
    +------+------+
    | 1 | 123 |
    | 2 | 234 |
    +------+------+
    2 rows in set (0.00 sec)

    
    #模糊匹配
    mysql> SELECT * FROM test1 WHERE MATCH('my document');
    +------+----------+------------+
    | id   | group_id | date_added |
    +------+----------+------------+
    |    1 |        1 | 1452839063 |
    |    2 |        1 | 1452839063 |
    +------+----------+------------+
    2 rows in set (0.00 sec)
    
    
    mysql> SELECT *, WEIGHT() FROM test1 WHERE MATCH('"document one"/1');SHOW META;
    +------+----------+------------+----------+
    | id   | group_id | date_added | weight() |
    +------+----------+------------+----------+
    |    1 |        1 | 1452839063 |     2663 |
    |    2 |        1 | 1452839063 |     1528 |
    +------+----------+------------+----------+
    2 rows in set (0.18 sec)
    #相关的元数据信息
    +---------------+----------+
    | Variable_name | Value    |
    +---------------+----------+
    | total         | 2        |
    | total_found   | 2        |
    | time          | 0.175    |
    | keyword[0]    | document |
    | docs[0]       | 2        |
    | hits[0]       | 2        |
    | keyword[1]    | one      |
    | docs[1]       | 1        |
    | hits[1]       | 2        |
    +---------------+----------+
    9 rows in set (0.00 sec)
    
    #相关的统计信息
    mysql> SET profiling=1;SELECT * FROM test1 WHERE id IN (1,2,4);SHOW PROFILE;
    Query OK, 0 rows affected (0.00 sec)
    
    +------+----------+------------+
    | id   | group_id | date_added |
    +------+----------+------------+
    |    1 |        1 | 1452839063 |
    |    2 |        1 | 1452839063 |
    |    4 |        2 | 1452839063 |
    +------+----------+------------+
    3 rows in set (0.00 sec)
    
    +--------------+----------+----------+---------+
    | Status       | Duration | Switches | Percent |
    +--------------+----------+----------+---------+
    | unknown      | 0.000206 | 4        | 65.61   |
    | net_read     | 0.000004 | 1        | 1.27    |
    | local_search | 0.000040 | 1        | 12.74   |
    | sql_parse    | 0.000027 | 1        | 8.60    |
    | fullscan     | 0.000002 | 1        | 0.64    |
    | finalize     | 0.000015 | 1        | 4.78    |
    | aggregate    | 0.000008 | 2        | 2.55    |
    | net_write    | 0.000012 | 1        | 3.82    |
    | eval_post    | 0.000000 | 1        | 0.00    |
    | total        | 0.000314 | 13       | 0       |
    +--------------+----------+----------+---------+
    10 rows in set (0.00 sec)
    
    
    mysql> SELECT id, id%3 idd FROM test1 WHERE MATCH('this is | nothing') GROUP BY idd;SHOW PROFILE;
    +------+------+
    | id   | idd  |
    +------+------+
    |    1 |    1 |
    |    2 |    2 |
    |    3 |    0 |
    +------+------+
    3 rows in set (0.18 sec)
    
    +--------------+----------+----------+---------+
    | Status       | Duration | Switches | Percent |
    +--------------+----------+----------+---------+
    | unknown      | 0.000430 | 6        | 0.24    |
    | net_read     | 0.000014 | 1        | 0.01    |
    | local_search | 0.000192 | 1        | 0.11    |
    | sql_parse    | 0.000056 | 1        | 0.03    |
    | dict_setup   | 0.000002 | 1        | 0.00    |
    | parse        | 0.000040 | 1        | 0.02    |
    | transforms   | 0.000003 | 1        | 0.00    |
    | init         | 0.177525 | 3        | 99.51   |
    | read_docs    | 0.000053 | 2        | 0.03    |
    | get_docs     | 0.000008 | 5        | 0.00    |
    | get_hits     | 0.000005 | 2        | 0.00    |
    | filter       | 0.000002 | 1        | 0.00    |
    | rank         | 0.000001 | 3        | 0.00    |
    | sort         | 0.000014 | 2        | 0.01    |
    | finalize     | 0.000004 | 1        | 0.00    |
    | aggregate    | 0.000017 | 2        | 0.01    |
    | net_write    | 0.000027 | 1        | 0.02    |
    | eval_post    | 0.000001 | 1        | 0.00    |
    | total        | 0.178394 | 35       | 0       |
    +--------------+----------+----------+---------+
    19 rows in set (0.00 sec)
    
    
    mysql> CALL KEYWORDS ('one two three', 'test1');
    +------+-----------+------------+
    | qpos | tokenized | normalized |
    +------+-----------+------------+
    | 1    | one       | one        |
    | 2    | two       | two        |
    | 3    | three     | three      |
    +------+-----------+------------+
    3 rows in set (0.00 sec)
    #hits表示是否命中
    mysql> CALL KEYWORDS ('one two three', 'test1', 1);
    +------+-----------+------------+------+------+
    | qpos | tokenized | normalized | docs | hits |
    +------+-----------+------------+------+------+
    | 1    | one       | one        | 1    | 2    |
    | 2    | two       | two        | 1    | 2    |
    | 3    | three     | three      | 0    | 0    |
    +------+-----------+------------+------+------+
    3 rows in set (0.00 sec)

     试验到这儿吧

  • 相关阅读:
    Azure HDInsight 现已在中国正式发布
    避免由于Windows Update自动安装安全补丁导致VM意外重启
    如何修复在Microsoft Azure中“虚拟机防火墙打开,关闭RDP的连接端口”问题
    关于Azure Auto Scale的高级属性配置
    在Azure中使用Load Runner测试TCP最大并发连接数
    Windows Azure案例分析: 选择虚拟机或云服务?
    Windows Server基础架构云参考架构:硬件之上的设计
    浅析基于微软SQL Server 2012 Parallel Data Warehouse的大数据解决方案
    在Windows Azure公有云环境部署企业应用
    如何在后台运行_Linux_命令并且将进程脱离终端
  • 原文地址:https://www.cnblogs.com/similarface/p/5133515.html
Copyright © 2011-2022 走看看