分页: 1 / 1

OpenBSD 4.7 sphinx-0.9.8.1 部署

发表于 : 2010-09-12 15:19
atyu30
Date: 2010.09.12
Author: atyu30
Version: 0.0.1

安装sphinx

代码: 全选

# pkg_add sphinx-0.9.8.1.tgz                                                                                                               
sphinx-0.9.8.1: ok                                                                                                                         
--- +sphinx-0.9.8.1 -------------------
If you are using Ruby-On-Rails, both the Sphincter and Ultrasphinx plugins
will automatically configure sphinx.
#
配置sphinx

代码: 全选

#
# Sphinx configuration for MediaWiki
#
# Based on examples by Paul Grinberg at http://www.mediawiki.org/wiki/Extension:SphinxSearch
# and Hank at http://www.ralree.info/2007/9/15/fulltext-indexing-wikipedia-with-sphinx
#
# Modified by Svemir Brkic for http://www.newworldencyclopedia.org/
#
# Released under GNU General Public License (see http://www.fsf.org/licenses/gpl.html)
#
# Latest version available at http://www.mediawiki.org/wiki/Extension:SphinxSearch
# ChangeLog:
# * Sun Sep 12 2010 atyu30 <ipostfix @ gmail.com> -
#- Initial build for OpenBSD 4.7.

# data source definition for the main index
source src_wiki_main
{
        # data source
        type            = mysql
        sql_host        = #replace with your db host
        sql_user        = #replace with your db username
        sql_pass        = #replace with your db password
        sql_db          = #replace with your db name
        # these two are optional
        #sql_port       = 3306
        #sql_sock       = /var/lib/mysql/mysql.sock

        # pre-query, executed before the main fetch query
        sql_query_pre   = SET NAMES utf8

        # main document fetch query - change the table names if you are using a prefix
        sql_query       = SELECT page_id, page_title, page_namespace, old_id, old_text FROM page, revision, text WHERE rev_id=page_latest AND old_id=rev_text_id

        # attribute columns
        sql_attr_uint   = page_namespace
        sql_attr_uint   = old_id

        # uncomment next line to collect all category ids for a category filter
        #sql_attr_multi  = uint category from query; SELECT cl_from, page_id AS category FROM categorylinks, page WHERE page_title=cl_to AND page_namespace=14

        # optional - used by command-line search utility to display document information
        sql_query_info  = SELECT page_title, page_namespace FROM page WHERE page_id=$id
}

# data source definition for the incremental index
source src_wiki_incremental : src_wiki_main
{
        # adjust this query based on the time you run the full index
        # in this case, full index runs at 3 AM (server time) which translates to 7 AM UTC
        sql_query       = SELECT page_id, page_title, page_namespace, old_id, old_text FROM page, revision, text WHERE rev_id=page_latest AND old_id=rev_text_id AND page_touched>=DATE_FORMAT(CURDATE(), '%Y%m%d070000')      

        # all other parameters are copied from the parent source,
}

# main index definition
index wiki_main
{
        # which document source to index
        source          = src_wiki_main

        # this is path and index file name without extension
        # you may need to change this path or create this folder
        path            = /var/sphinx/data/wiki_main

        # docinfo (ie. per-document attribute values) storage strategy
        docinfo         = extern

        # morphology
        morphology      = stem_en

        # stopwords file
        #stopwords      = /var/data/sphinx/stopwords.txt

        # minimum word length
        min_word_len    = 1

        # uncomment next 2 lines to allow wildcard (*) searches
        #min_infix_len = 1
        #enable_star = 1

        # charset encoding type
        charset_type    = utf-8

        # charset definition and case folding rules "table"
        charset_table   = 0..9, A..Z->a..z, a..z, \
                U+C0->a, U+C1->a, U+C2->a, U+C3->a, U+C4->a, U+C5->a, U+C6->a, \
                U+C7->c,U+E7->c, U+C8->e, U+C9->e, U+CA->e, U+CB->e, U+CC->i, \
                U+CD->i, U+CE->i, U+CF->i, U+D0->d, U+D1->n, U+D2->o, U+D3->o, \
                U+D4->o, U+D5->o, U+D6->o, U+D8->o, U+D9->u, U+DA->u, U+DB->u, \
                U+DC->u, U+DD->y, U+DE->t, U+DF->s, \
                U+E0->a, U+E1->a, U+E2->a, U+E3->a, U+E4->a, U+E5->a, U+E6->a, \
                U+E7->c,U+E7->c, U+E8->e, U+E9->e, U+EA->e, U+EB->e, U+EC->i, \
                U+ED->i, U+EE->i, U+EF->i, U+F0->d, U+F1->n, U+F2->o, U+F3->o, \
                U+F4->o, U+F5->o, U+F6->o, U+F8->o, U+F9->u, U+FA->u, U+FB->u, \
                U+FC->u, U+FD->y, U+FE->t, U+FF->s,

}

# incremental index definition
index wiki_incremental : wiki_main
{
        path            = /var/sphinx/data/wiki_incremental
        source          = src_wiki_incremental
}


# indexer settings
indexer
{
        # memory limit (default is 32M)
        mem_limit       = 64M
}

# searchd settings
searchd
{
        # IP address on which search daemon will bind and accept
        # optional, default is to listen on all addresses,
        # ie. listen = 0.0.0.0
        address         = 127.0.0.1

        # port on which search daemon will listen
        port            = 9312

        # searchd run info is logged here - create or change the folder
        log             = /var/sphinx/log/searchd.log

        # all the search queries are logged here
        query_log       = /var/sphinx/log/query.log

        # client read timeout, seconds
        read_timeout    = 5

        # maximum amount of children to fork
        max_children    = 30

        # a file which will contain searchd process ID
        pid_file        = /var/sphinx/log/searchd.pid

        # maximum amount of matches this daemon would ever retrieve
        # from each index and serve to client
        max_matches     = 1000
}

# --eof--
建立索引

代码: 全选

# indexer --config=/etc/sphinx.conf  --all                   
Sphinx 0.9.8.1-release (r1533)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/etc/sphinx.conf'...
indexing index 'wiki_main'...
collected 3 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 3 docs, 501 bytes
total 0.062 sec, 8052.07 bytes/sec, 48.22 docs/sec
indexing index 'wiki_incremental'...
collected 0 docs, 0.0 MB
total 0 docs, 0 bytes
total 0.043 sec, 0.00 bytes/sec, 0.00 docs/sec
启动sphinx

代码: 全选

# searchd                                                                                                                                  
Sphinx 0.9.8.1-release (r1533)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/etc/sphinx.conf'...
creating server socket on 127.0.0.1:9312
# 

发表于 : 2010-09-12 15:40
leo
简单高效的全文搜索引擎,多谢提供,我还真希望有人可以整合到这个php的vbulletin 4论坛里。正好新版的vbulletin中文搜索有问题,正打算在本地安装一个sphinx来辅助搜索,绝对是及时雨,这个抽空一定要试试。