- Make sure to check CentOS, PHP-FPM, Nginx, memcached and MYSQL posts.
- Get fresh RPM from sphinxsearch.com
- Install Sphinx
rpm -Uhv http://sphinxsearch.com/files/sphinx-2.0.4-1.rhel6.x86_64.rpm |
- Create sample MySQL database
CREATE DATABASE `Sample_DB` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; USE `Sample_DB`; CREATE TABLE IF NOT EXISTS `sites` ( `id` INT(11) UNSIGNED NOT NULL AUTO_INCREMENT, `url` VARCHAR(255) NOT NULL, `Title` VARCHAR(1024) NOT NULL, `Description` VARCHAR(4000) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO `sites` (`url`, `Title`, `Description`) VALUES ( 'www.petercolephoto.com', 'Cole, Peter', 'A gallery of 1100 plus adeventure sports images organized and presented in thumbnail/popup album form using PHP/mySQL on an Apache/Linux server.'), ( 'www.customwebapps.com', 'Custom Web Apps', 'PHP/MYSQL Web-based software development company'), ( 'www.dynamicpros.com', 'DynamicPros', 'Professional web programming services. Specializing in MySQL and e-commerce solutions.'), ( 'www.perl-resume.com', 'Sagayam, Christopher', 'Perl and CGI consultant with experience in Perl, Linux, NT, MySQL, MS-SQL, CGI, internet programming.'), ( 'cfortune.kics.bc.ca', 'Fortune, Chris', 'Freelance computer programmer in PHP, MySQL, HTML, CSS, Javascript, DHTML, Perl, CGI, ecommerce.'), ( 'php4hire.com', 'Klimov, Victor', 'Main expertise is designing customized web based applications using PHP and MySQL for Linux, Unix and Windows NT platforms.'), ( 'www.erasmos.com', 'Rasmussen, Sean', 'Internet systems developer, using languages and tools like Java, Perl, PHP, and SQL (Oracle, Mysql. Sun Java Certified. Seeks either contract or permanent employment.'), ( 'www.dalinowen.com', 'Owen, Dalin S.', 'Network security professional: Linux, UNIX, MS, C/C++, Perl, Apache, Qmail, MySQL, Tripwire, IPF, IPFW, PF, IPCHAINS, IPBUCKETS, Snort.'), ( 'www.kickfire.com', 'Kickfire', 'Database appliance based on MySQL.'), ( 'www.evoknow.com', 'Evoknow, Inc.', 'Full service company developing scalable Web applications in PHP and MySQL. Sacramento, California.'), ( 'www.primeharbor.com', 'PrimeHarbor Technologies, Inc.', 'Consulting firm specializing in OpenSource tools and languages. Expertise in MySQL, Apache, qmail, perl, python, java, C/C++, and kylix. Atlanta, Georgia.'), ( 'vidgen.sourceforge.net', 'VI Document Generator', 'VidGen bridges the gap between Databases and Dynamically Printed Documents with use of SVG, PHP, and mySQL. Input SVG and export various script formats like: Xerox VIPP, SOLscript, PDFlib-PDI or PreS. Or merge to PDF, PS, PPML, VPS, PCL or W3C SVGprint.'), ( 'www.collegefundsoftware.com', 'CollegeFund Software', 'Develops QuarkXTensions and AppleScripts developed for QuarkXPress. Also offers software development, training and consulting for the publishing industry. As well as web-based development using Perl, PHP and MySQL.'), ( 'www.usefulcomputerinfo.com', 'Useful Computer Info', 'Tutorials for windows, active server pages, mysql, fonts and search engine optimization.'), ( 'mysqltutorials.bravehost.com', 'Mysqlhelpster', 'Provides help with Mysql, Php, Html and Javascript.'), ( 'www.rapidweb.info', 'RapidWeb', 'A web hosting service offers an open-source MySQL-based content management system.'), ( 'weblogs.us', 'Weblogs.us', 'Offering free Wordpress blogs. Full FTP and MySQL access. No ads required on hosted pages.'), ( 'cafelog.com', 'b2', 'A news/ weblog tool (aka logware). Requires a server that can run PHP4, and a MySQL database. Freeware.'), ( 'monauraljerk.org', 'Monaural Jerk', 'Free, open-source PHP/MySQL weblog system. Includes calendar navigation, "edit this page", searching, channels, RSS, XML, tell-a-friend, and spider-friendly URLs.'), ( 'scoop.kuro5hin.org', 'Scoop', 'Free collaborative media software for running community web logs . Requires Perl, mod_perl, and MySQL.'), ( 'b2evolution.net', 'b2evolution', 'Full featured PHP/ mySQL weblog tool. Supports multiple categories, sub-catgeories, multiple weblogs, skins, statistics, comments, anti-spam filters.'), ( 'www.geeklog.net', 'Geeklog', 'Free open-source blog tool. Runs on many different operating systems and uses PHP4 and MySQL.'), ( 'thingamablog.sourceforge.net', 'Thingamablog', 'A cross-platform, standalone application for authoring and publishing weblogs. It will work on any platform on which Java can run. Thingamablog does not require a third-party blogging host, a CGI/PHP enabled web server, or a MySQL database. The only requirement to setup and manage a blog is FTP access to a web server.'), ( 'boastology.com', 'boastMachine', 'PHP/MySQL powered blogging platform.'), ( 'blogcms.com', 'BLOG:CMS', 'Blogware requires PHP and MySQL. Features include weblog, forum, wiki, news aggregator, and photo gallery.'), ( 'presstopia.com', 'Presstopia', 'Open source ASP.NET weblog application. Supports MySQL, MS Access, MS SQL Serve, Atom 0.3, RSS 2.0 feeds, multiple authors, comment, trackbacks, update pings.'), ( 'wheatblog.sourceforge.net', 'Wheatblog', 'A lightweight, open source, customizable blogging and content management solution powered by PHP and MySQL or SQLite.'), ( 'www.gidforums.com', 'GIDForums', 'Webmaster community that discusses issues on hosting, website design, and PHP/MySQL coding.'), ( 'www.catamatic.com', 'Click-IT', 'Low cost readymade dynamic websites with database and full content management facilities, plus bespoke PHP and mySQL programming services.'), ( 'www.dhishna.com', 'Dhishna Technologies', 'Offers design using ASP, SML, Flash, Photoshop, PHP, Mysql and Java. Based in Kerala, India.'); CREATE TABLE IF NOT EXISTS `sph_counter` ( `counter_id` INT(11) NOT NULL, `max_doc_id` INT(11) NOT NULL, PRIMARY KEY (`counter_id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; INSERT INTO `sph_counter` (`counter_id`, `max_doc_id`) VALUES (1, 1); |
- Config sphinx
mkdir -p /server/sphinx/data chown -R sphinx /server/sphinx mv /etc/sphinx/sphinx.conf /etc/sphinx/sphinx.conf.old vi /etc/sphinx/sphinx.conf |
# # Sphinx configuration file sample # # WARNING! While this sample file mentions all available options, # it contains (very) short helper descriptions only. Please refer to # doc/sphinx.html for details. # OR http://www.molecularsciences.org/book/export/html/405 ############################################################################# ## data source definition for sites ############################################################################# source sites { # data source type. mandatory, no default value # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc type = mysql ##################################################################### ## SQL settings (for 'mysql' and 'pgsql' types) ##################################################################### # some straightforward parameters for SQL source types sql_host = localhost sql_user = root sql_pass = rootpass sql_db = Sample_DB #sql_port = 3306 # optional, default is 3306 sql_query_pre = SET SESSION query_cache_type=OFF #update counter sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sites # main document fetch query # mandatory, integer document ID field MUST be the first selected column sql_query = SELECT id,url ,Title,Description FROM sites \ WHERE id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 ) sql_ranged_throttle = 0 } ############################################################################# ## sitesDelta source definition ############################################################################# source sitesDelta : sites { sql_query_pre = SET SESSION query_cache_type=OFF sql_query = SELECT id,url ,Title,Description FROM sites \ WHERE id > ( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 ) } ############################################################################# ## sites index definition ############################################################################# # local index example # # this is an index which is stored locally in the filesystem # # all indexing-time options (such as morphology and charsets) # are configured per local index index sites { # document source(s) to index # multi-value, mandatory # document IDs must be globally unique across all sources source = sites # index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended path = /server/sphinx/data/sites # minimum indexed word length # default is 1 (index everything) min_word_len = 3 # minimum word infix length to index # optional, default is 0 (do not index infixes) # min_infix_len = 3 # enable star-syntax (wildcards) when searching prefix/infix indexes # known values are 0 and 1 # optional, default is 0 (do not use wildcard syntax) # enable_star = 1 #enable chinese search ngram_len = 1 } ############################################################################# ## sitesDelta index definition ############################################################################# index sitesDelta : sites { # document source(s) to index # multi-value, mandatory # document IDs must be globally unique across all sources source = sitesDelta # index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended path = /server/sphinx/data/sitesDelta # minimum indexed word length # default is 1 (index everything) min_word_len = 3 # minimum word infix length to index # optional, default is 0 (do not index infixes) # min_infix_len = 3 # enable star-syntax (wildcards) when searching prefix/infix indexes # known values are 0 and 1 # optional, default is 0 (do not use wildcard syntax) # enable_star = 1 #enable chinese search ngram_len = 1 } ####################################################################################### ############################################################################# ## indexer settings ############################################################################# indexer { # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) # optional, default is 32M, max is 2047M, recommended is 256M to 1024M mem_limit = 32M } ############################################################################# ## searchd settings ############################################################################# searchd { # hostname, port, or hostname:port, or /unix/socket/path to listen on # multi-value, multiple listen points are allowed # optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312) # # listen = 127.0.0.1 # listen = 192.168.0.1:9312 # listen = 9312 compat_sphinxql_magics = 0 # log file, searchd run info is logged here # optional, default is 'searchd.log' log = /var/log/searchd.log # query log file, all search queries are logged here # optional, default is empty (do not log queries) query_log = /var/log/query.log # client read timeout, seconds # optional, default is 5 read_timeout = 5 # request timeout, seconds # optional, default is 5 minutes client_timeout = 300 # maximum amount of children to fork (concurrent searches to run) # optional, default is 0 (unlimited) max_children = 30 # PID file, searchd process ID file name # mandatory pid_file = /var/log/searchd.pid # max amount of matches the daemon ever keeps in RAM, per-index # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL # default is 1000 (just like Google) max_matches = 1000 # seamless rotate, prevents rotate stalls if precaching huge datasets # optional, default is 1 seamless_rotate = 1 # whether to forcibly preopen all indexes on startup # optional, default is 0 (do not preopen) preopen_indexes = 0 # whether to unlink .old index copies on succesful rotation. # optional, default is 1 (do unlink) unlink_old = 1 # crash log path # searchd will (try to) log crashed query to 'crash_log_path.PID' file # optional, default is empty (do not create crash logs) # #crash_log_path = /server/log/sphinx/crash } # --eof-- |
- Create indexes
/usr/bin/indexer --all chown -R sphinx /server/sphinx |
- Start Sphinx
/etc/init.d/searchd start |
- Run a test search for word ‘mysql’
/usr/bin/search mysql |
- Search using PHP
- Get sphinxapi.php : Sphinx comes with a PHP implementation of searchd client API. You can find it at api/sphinxapi.php in the root of the sphinx source tarball (tar.gz).
<?php error_reporting(E_ALL); ini_set("display_errors",1); $q = trim( html_entity_decode( $_GET['q'], ENT_QUOTES, 'UTF-8') ) ; $sphx = sphinx_search($q, 0, 20); //print_r( $sphx ); if( empty($sphx['ids']) ) die('No Results'); $ids = $sphx['ids']; $sql = "SELECT `id`, `url`, `Title`, `Description` FROM `sites` WHERE `id` in ( {$ids} ) ORDER BY FIELD(`id`, {$ids} )"; db(); if( !($r = mysql_query($sql))) die("[MYSQL]".mysql_error() . mysql_errno() ); $max = $sphx['total']; $num_rows = $sphx['docs']; echo "<b>Displaying {$num_rows} results of {$max}</b><br /><br />"; while($row = mysql_fetch_assoc($r) ) { echo "<a href=\"http://{$row['url']}\">{$row['Title']}</a><br />{$row['Description']}<br /><hr />"; } mysql_free_result($r); /* * SPHINX Search */ /* * Search sites by Keywords using sphinx; with an option to search sites tags only * @param string $q te keyword * @param int $i id of the first result to return * @param int $max max results to return * @param bollen $url set to true to return matches from the 'url' column only * * @return string $ids comma seperated list of ids */ function sphinx_search($q, $i, $limit, $url=false){ require_once 'sphinxapi.php'; $ids = ''; $cl = new SphinxClient(); $cl->SetServer( "localhost" , 9312); $cl->SetMatchMode( SPH_MATCH_EXTENDED ); $cl->SetSortMode ( SPH_SORT_RELEVANCE ); $cl->SetFieldWeights(array('url' => 3000, 'Title' => 300, 'Description' => 30 )); $cl->SetLimits( $i , $limit); $q = $cl->EscapeString( $q); //search url only $q = $url ? "@url {$q}" : $q; $result = $cl->Query( $q, 'sites sitesDelta' ); if ( $result === false ) error_log( '[SPHINX]Query failed: ' . $cl->GetLastError() ); elseif ( $cl->GetLastWarning() ) error_log( '[SPHINX]WARNING: ' . $cl->GetLastWarning() ); if ( !empty($result["matches"]) ){ foreach ( $result["matches"] as $doc => $docinfo ) $ids .= "$doc,"; $ids = substr( $ids, 0, -1 ); }else return false; return array( 'ids' => $ids, 'total' => $result['total'], 'docs' => count($result["matches"]) ); } /* * Connect to MySQL */ function db(){ if( !empty($GLOBALS['db']) ) return true; if( !$GLOBALS['db'] = mysql_connect('localhost', 'root', 'rootpass' ) ) { die("[MYSQL]".mysql_error() . mysql_errno() ); } elseif(!mysql_select_db('Sample_DB')) { die("[MYSQL]".mysql_error() . mysql_errno() ); } } ?> |
- Run sphinx.php http://www.example.com/sphinx.php?q=*mysql
- Create script to rotate delta indexes
vi /server/sphinx-delta.sh |
#!/bin/sh if [ -z "$(/sbin/pidof indexer)" ] then /usr/bin/indexer sitesDelta --rotate; fi |
- add cron job
chmod +x /server/sphinx-delta.sh crontab -e |
##########################sphinx #rotate delta index every 30 min */30 * * * * /server/sphinx-deltas.sh > /dev/null 2>&1 |
- To merge indexes run
/usr/bin/indexer --merge sites sitesDelta --rotate; |