zoukankan      html  css  js  c++  java
  • Install Sphinx 2.0.4 on CentOS 6.2

    - Make sure to check CentOS, PHP-FPM, Nginx, memcached and MYSQL posts.

    - Get fresh RPM from sphinxsearch.com

    - Install Sphinx

    rpm -Uhv http://sphinxsearch.com/files/sphinx-2.0.4-1.rhel6.x86_64.rpm

    - Create sample MySQL database

    CREATE DATABASE `Sample_DB` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
    USE `Sample_DB`;
     
    CREATE TABLE IF NOT EXISTS `sites` (
      `id` INT(11) UNSIGNED NOT NULL AUTO_INCREMENT,
      `url` VARCHAR(255) NOT NULL,
      `Title` VARCHAR(1024) NOT NULL,
      `Description` VARCHAR(4000) NOT NULL,
      PRIMARY KEY (`id`)
    ) ENGINE=MyISAM  DEFAULT CHARSET=utf8;
     
    INSERT INTO `sites` (`url`, `Title`, `Description`) VALUES
    ( 'www.petercolephoto.com', 'Cole, Peter', 'A gallery of 1100 plus adeventure sports images organized and presented in thumbnail/popup album form using PHP/mySQL on an Apache/Linux server.'),
    ( 'www.customwebapps.com', 'Custom Web Apps', 'PHP/MYSQL Web-based software development company'),
    ( 'www.dynamicpros.com', 'DynamicPros', 'Professional web programming services. Specializing in MySQL and e-commerce solutions.'),
    ( 'www.perl-resume.com', 'Sagayam, Christopher', 'Perl and CGI consultant with experience in Perl, Linux, NT, MySQL, MS-SQL, CGI, internet programming.'),
    ( 'cfortune.kics.bc.ca', 'Fortune, Chris', 'Freelance computer programmer in PHP, MySQL, HTML, CSS, Javascript, DHTML, Perl, CGI, ecommerce.'),
    ( 'php4hire.com', 'Klimov, Victor', 'Main expertise is designing customized web based applications using PHP and MySQL for Linux, Unix and Windows NT platforms.'),
    ( 'www.erasmos.com', 'Rasmussen, Sean', 'Internet systems developer, using languages and tools like Java, Perl, PHP, and SQL (Oracle, Mysql. Sun Java Certified. Seeks either contract or permanent employment.'),
    ( 'www.dalinowen.com', 'Owen, Dalin S.', 'Network security professional: Linux, UNIX, MS, C/C++, Perl, Apache, Qmail, MySQL, Tripwire, IPF, IPFW, PF, IPCHAINS, IPBUCKETS, Snort.'),
    ( 'www.kickfire.com', 'Kickfire', 'Database appliance based on MySQL.'),
    ( 'www.evoknow.com', 'Evoknow, Inc.', 'Full service company developing scalable Web applications in PHP and MySQL. Sacramento, California.'),
    ( 'www.primeharbor.com', 'PrimeHarbor Technologies, Inc.', 'Consulting firm specializing in OpenSource tools and languages. Expertise in MySQL, Apache, qmail, perl, python, java, C/C++, and kylix. Atlanta, Georgia.'),
    ( 'vidgen.sourceforge.net', 'VI Document Generator', 'VidGen bridges the gap between Databases and Dynamically Printed Documents with use of SVG, PHP, and mySQL. Input SVG and export various script formats like: Xerox VIPP, SOLscript, PDFlib-PDI or PreS. Or merge to PDF, PS, PPML, VPS, PCL or W3C SVGprint.'),
    ( 'www.collegefundsoftware.com', 'CollegeFund Software', 'Develops QuarkXTensions and AppleScripts developed for QuarkXPress. Also offers software development, training and consulting for the publishing industry. As well as web-based development using Perl, PHP and MySQL.'),
    ( 'www.usefulcomputerinfo.com', 'Useful Computer Info', 'Tutorials for windows, active server pages, mysql, fonts and search engine optimization.'),
    ( 'mysqltutorials.bravehost.com', 'Mysqlhelpster', 'Provides help with Mysql, Php, Html and Javascript.'),
    ( 'www.rapidweb.info', 'RapidWeb', 'A web hosting service offers an open-source MySQL-based content management system.'),
    ( 'weblogs.us', 'Weblogs.us', 'Offering free Wordpress blogs. Full FTP and MySQL access. No ads required on hosted pages.'),
    ( 'cafelog.com', 'b2', 'A news/ weblog tool (aka logware). Requires a server that can run PHP4, and a MySQL database. Freeware.'),
    ( 'monauraljerk.org', 'Monaural Jerk', 'Free, open-source PHP/MySQL weblog system. Includes calendar navigation, "edit this page", searching, channels, RSS, XML, tell-a-friend, and spider-friendly URLs.'),
    ( 'scoop.kuro5hin.org', 'Scoop', 'Free collaborative media software for running community web logs . Requires Perl, mod_perl, and MySQL.'),
    ( 'b2evolution.net', 'b2evolution', 'Full featured PHP/ mySQL weblog tool. Supports multiple categories, sub-catgeories, multiple weblogs, skins, statistics, comments, anti-spam filters.'),
    ( 'www.geeklog.net', 'Geeklog', 'Free open-source blog tool. Runs on many different operating systems and uses PHP4 and MySQL.'),
    ( 'thingamablog.sourceforge.net', 'Thingamablog', 'A cross-platform, standalone application for authoring and publishing weblogs. It will work on any platform on which Java can run. Thingamablog does not require a third-party blogging host, a CGI/PHP enabled web server, or a MySQL database. The only requirement to setup and manage a blog is FTP access to a web server.'),
    ( 'boastology.com', 'boastMachine', 'PHP/MySQL powered blogging platform.'),
    ( 'blogcms.com', 'BLOG:CMS', 'Blogware requires PHP and MySQL. Features include weblog, forum, wiki, news aggregator, and photo gallery.'),
    ( 'presstopia.com', 'Presstopia', 'Open source ASP.NET weblog application. Supports MySQL, MS Access, MS SQL Serve, Atom 0.3, RSS 2.0 feeds, multiple authors, comment, trackbacks, update pings.'),
    ( 'wheatblog.sourceforge.net', 'Wheatblog', 'A lightweight, open source, customizable blogging and content management solution powered by PHP and MySQL or SQLite.'),
    ( 'www.gidforums.com', 'GIDForums', 'Webmaster community that discusses issues on hosting, website design, and PHP/MySQL coding.'),
    ( 'www.catamatic.com', 'Click-IT', 'Low cost readymade dynamic websites with database and full content management facilities, plus bespoke PHP and mySQL programming services.'),
    ( 'www.dhishna.com', 'Dhishna Technologies', 'Offers design using ASP, SML, Flash, Photoshop, PHP, Mysql and Java. Based in Kerala, India.');
     
    CREATE TABLE IF NOT EXISTS `sph_counter` (
      `counter_id` INT(11) NOT NULL,
      `max_doc_id` INT(11) NOT NULL,
      PRIMARY KEY (`counter_id`)
    ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
     
    INSERT INTO `sph_counter` (`counter_id`, `max_doc_id`) VALUES
    (1, 1);

    - Config sphinx

    mkdir -p /server/sphinx/data
    chown -R sphinx /server/sphinx
    mv  /etc/sphinx/sphinx.conf /etc/sphinx/sphinx.conf.old
    vi /etc/sphinx/sphinx.conf
    #
    # Sphinx configuration file sample
    #
    # WARNING! While this sample file mentions all available options,
    # it contains (very) short helper descriptions only. Please refer to
    # doc/sphinx.html for details.
    # OR http://www.molecularsciences.org/book/export/html/405
     
     
    #############################################################################
    ## data source definition for sites
    #############################################################################
     
    source sites
    {
    	# data source type. mandatory, no default value
    	# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
    	type					= mysql
     
    	#####################################################################
    	## SQL settings (for 'mysql' and 'pgsql' types)
    	#####################################################################
     
    	# some straightforward parameters for SQL source types
    	sql_host				= localhost
    	sql_user				= root
    	sql_pass				= rootpass
    	sql_db					= Sample_DB
    	#sql_port				= 3306	# optional, default is 3306
     
    	sql_query_pre   		= SET SESSION query_cache_type=OFF
     
    	#update counter
    	sql_query_pre 			= REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sites
     
    	# main document fetch query
    	# mandatory, integer document ID field MUST be the first selected column 
    	sql_query				= 	SELECT  id,url ,Title,Description FROM sites \
                                    WHERE   id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
     
        sql_ranged_throttle		= 0
     
    }
     
     
    #############################################################################
    ## sitesDelta source definition
    #############################################################################
     
     
     
    source sitesDelta : sites
    {
        sql_query_pre   	= 	SET SESSION query_cache_type=OFF
    	sql_query			= 	SELECT  id,url ,Title,Description FROM sites \
    							WHERE   id > ( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
     
     
    }
     
     
     
    #############################################################################
    ## sites index definition
    #############################################################################
     
    # local index example
    #
    # this is an index which is stored locally in the filesystem
    #
    # all indexing-time options (such as morphology and charsets)
    # are configured per local index
    index sites
    {
    	# document source(s) to index
    	# multi-value, mandatory
    	# document IDs must be globally unique across all sources
    	source			= sites
     
    	# index files path and file name, without extension
    	# mandatory, path must be writable, extensions will be auto-appended
    	path			= /server/sphinx/data/sites
     
     
    	# minimum indexed word length
    	# default is 1 (index everything)
    	min_word_len		= 3
     
    	# minimum word infix length to index
    	# optional, default is 0 (do not index infixes)
    	#
    	min_infix_len		= 3
     
    	# enable star-syntax (wildcards) when searching prefix/infix indexes
    	# known values are 0 and 1
    	# optional, default is 0 (do not use wildcard syntax)
    	#
    	enable_star		= 1
     
    	#enable chinese search
    	ngram_len                                      = 1
    }
     
     
     
     
     
     
    #############################################################################
    ## sitesDelta index definition
    #############################################################################
     
     
    index sitesDelta : sites
    {
    	# document source(s) to index
    	# multi-value, mandatory
    	# document IDs must be globally unique across all sources
    	source			= sitesDelta
     
    	# index files path and file name, without extension
    	# mandatory, path must be writable, extensions will be auto-appended
    	path			= /server/sphinx/data/sitesDelta
     
    	# minimum indexed word length
    	# default is 1 (index everything)
    	min_word_len		= 3
     
    	# minimum word infix length to index
    	# optional, default is 0 (do not index infixes)
    	#
    	min_infix_len		= 3
     
    	# enable star-syntax (wildcards) when searching prefix/infix indexes
    	# known values are 0 and 1
    	# optional, default is 0 (do not use wildcard syntax)
    	#
    	enable_star		= 1
     
     
    	#enable chinese search
    	ngram_len                                      = 1
     
     
    }
     
     
     
     
     
    #######################################################################################
     
     
    #############################################################################
    ## indexer settings
    #############################################################################
     
    indexer
    {
    	# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
    	# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
    	mem_limit			= 32M     
     
    }
     
     
     
     
    #############################################################################
    ## searchd settings
    #############################################################################
     
    searchd
    {
    	# hostname, port, or hostname:port, or /unix/socket/path to listen on
    	# multi-value, multiple listen points are allowed
    	# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
    	#
    	# listen				= 127.0.0.1
    	# listen				= 192.168.0.1:9312
    	# listen				= 9312
     
    	compat_sphinxql_magics = 0
     
    	# log file, searchd run info is logged here
    	# optional, default is 'searchd.log'
    	log				= /var/log/searchd.log
     
    	# query log file, all search queries are logged here
    	# optional, default is empty (do not log queries)
    	query_log			= /var/log/query.log
     
    	# client read timeout, seconds
    	# optional, default is 5
    	read_timeout		= 5
     
    	# request timeout, seconds
    	# optional, default is 5 minutes
    	client_timeout		= 300
     
    	# maximum amount of children to fork (concurrent searches to run)
    	# optional, default is 0 (unlimited)
    	max_children		= 30
     
    	# PID file, searchd process ID file name
    	# mandatory
    	pid_file			= /var/log/searchd.pid
     
    	# max amount of matches the daemon ever keeps in RAM, per-index
    	# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
    	# default is 1000 (just like Google)
    	max_matches			= 1000
     
    	# seamless rotate, prevents rotate stalls if precaching huge datasets 
    	# optional, default is 1
    	seamless_rotate		= 1
     
    	# whether to forcibly preopen all indexes on startup
    	# optional, default is 0 (do not preopen)
    	preopen_indexes		= 0
     
    	# whether to unlink .old index copies on succesful rotation.
    	# optional, default is 1 (do unlink)
    	unlink_old			= 1
     
    	# crash log path
    	# searchd will (try to) log crashed query to 'crash_log_path.PID' file
    	# optional, default is empty (do not create crash logs)
    	#
    	#crash_log_path		= /server/log/sphinx/crash
     
     
    }
     
    # --eof--

    - Create indexes

    /usr/bin/indexer --all
    chown -R sphinx /server/sphinx

    - Start Sphinx

    /etc/init.d/searchd start

    - Run a test search for word ‘mysql’

    /usr/bin/search mysql

    - Search using PHP
    - Get sphinxapi.php : Sphinx comes with a PHP implementation of searchd client API. You can find it at api/sphinxapi.php in the root of the sphinx source tarball (tar.gz).

    <?php 
    error_reporting(E_ALL);
    ini_set("display_errors",1);
     
    $q = trim( html_entity_decode( $_GET['q'], ENT_QUOTES, 'UTF-8') ) ;
     
    $sphx = sphinx_search($q, 0, 20);
    //print_r( $sphx );
     
    if( empty($sphx['ids']) )
        die('No Results');
     
    $ids = $sphx['ids'];
     
    $sql =  "SELECT `id`, `url`, `Title`, `Description`
                FROM `sites`
                WHERE  `id` in ( {$ids}  )
                ORDER BY FIELD(`id`, {$ids} )";
    db();
     
    if(  !($r = mysql_query($sql)))
        die("[MYSQL]".mysql_error() . mysql_errno() );
     
    $max = $sphx['total'];
    $num_rows = $sphx['docs'];
     
    echo "<b>Displaying {$num_rows} results of {$max}</b><br /><br />";
     
    while($row = mysql_fetch_assoc($r) ) {
        echo "<a href=\"http://{$row['url']}\">{$row['Title']}</a><br />{$row['Description']}<br /><hr />";    
        }
    mysql_free_result($r);
     
    /*
     * SPHINX Search
     */
     
    /*
     * Search sites by Keywords using sphinx; with an option to search sites tags only
     * @param string $q te keyword
     * @param int $i id of the first result to return
     * @param int $max max results to return
     * @param bollen $url set to true to return matches from the 'url' column only
     * 
     * @return string $ids comma seperated list of ids
     */
    function sphinx_search($q, $i, $limit, $url=false){
            require_once 'sphinxapi.php';
     
            $ids = '';
     
            $cl = new SphinxClient();
            $cl->SetServer( "localhost" , 9312);
            $cl->SetMatchMode( SPH_MATCH_EXTENDED  );
            $cl->SetSortMode ( SPH_SORT_RELEVANCE );
            $cl->SetFieldWeights(array('url' => 3000, 'Title' => 300, 'Description' => 30 ));
            $cl->SetLimits( $i , $limit);
            $q = $cl->EscapeString( $q);
     
            //search url only
            $q = $url ? "@url {$q}" : $q;
     
            $result = $cl->Query( $q, 'sites sitesDelta' );
     
            if ( $result === false )
                    error_log( '[SPHINX]Query failed: ' . $cl->GetLastError() );
            elseif ( $cl->GetLastWarning() ) 
                    error_log( '[SPHINX]WARNING: ' .  $cl->GetLastWarning() );
     
            if ( !empty($result["matches"]) ){
                foreach ( $result["matches"] as $doc => $docinfo ) 
                     $ids .= "$doc,";
                $ids = substr( $ids, 0, -1 );
           }else
               return false;
     
           return  array( 'ids' => $ids, 'total' => $result['total'], 'docs' => count($result["matches"])  );
     
    }
     
    /*
     * Connect to MySQL
     */
    function db(){
     
        if( !empty($GLOBALS['db']) ) return true;
     
        if( !$GLOBALS['db'] = mysql_connect('localhost', 'root', 'rootpass' ) ) {
            die("[MYSQL]".mysql_error() . mysql_errno() );
        }
        elseif(!mysql_select_db('Sample_DB')) {
            die("[MYSQL]".mysql_error() . mysql_errno() );
        }    
     
    }
    ?>

    - Run sphinx.php http://www.example.com/sphinx.php?q=*mysql

    - Create script to rotate delta indexes

    vi /server/sphinx-delta.sh
    #!/bin/sh
    if [ -z "$(/sbin/pidof indexer)" ]
    	then
        		/usr/bin/indexer  sitesDelta  --rotate;
    fi

    - add cron job

    chmod +x /server/sphinx-delta.sh
    crontab -e
    ##########################sphinx
    #rotate delta index every 30 min
    */30  *  *  *  * /server/sphinx-deltas.sh > /dev/null 2>&1

    - To merge indexes run

    /usr/bin/indexer --merge sites sitesDelta --rotate;
  • 相关阅读:
    ntp
    mknod
    timeout/timelimit
    Eclipse 包变成文件夹
    Eclipse 包变成文件夹
    Java生成随机字符串和随即生成10以内的字符串
    Java生成随机字符串和随即生成10以内的字符串
    插入排序
    插入排序
    Java 中基本类型和包装类之间的转换
  • 原文地址:https://www.cnblogs.com/helloyb/p/3049685.html
Copyright © 2011-2022 走看看