sphinx的配置文件是在配置的时候最容易出错的了:
我们先要明白几个概念:
- source:数据源,数据是从什么地方来的。
- index:索引,当有数据源之后,从数据源处构建索引。索引实际上就是相当于一个字典检索。有了整本字典内容以后,才会有字典检索。
- searchd:提供搜索查询服务。它一般是以deamon的形式运行在后台的。
- indexer:构建索引的服务。当要重新构建索引的时候,就是调用indexer这个命令。
- attr:属性,属性是存在索引中的,它不进行全文索引,但是可以用于过滤和排序。
sphinx的配置文件过于冗长,我们把sphinx默认的配置文件中的注释部分去掉,重新进行描述理解,就成下面的样子了:
配置文件
source src1
{
type = mysql
sql_host = localhost
sql_user = test
sql_pass =
sql_db = test
sql_port = 3306
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
FROM documents
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_query_info = SELECT * FROM documents WHERE id=$id
}
source src1throttled : src1
{
sql_ranged_throttle = 100
}
index test1
{
source = src1
path = /home/yejianfeng/instance/coreseek/var/data/test1
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
charset_type = sbcs
html_strip = 0
}
index test1stemmed : test1
{
path = /home/yejianfeng/instance/coreseek/var/data/test1stemmed
morphology = stem_en
}
index dist1
{
type = distributed
local = test1
local = test1stemmed
agent = localhost:9313:remote1
agent = localhost:9314:remote2,remote3
agent_connect_timeout = 1000
agent_query_timeout = 3000
}
index rt
{
type = rt
path = /home/yejianfeng/instance/coreseek/var/data/rt
rt_field = title
rt_field = content
rt_attr_uint = gid
}
indexer
{
mem_limit = 32M
}
searchd
{
listen = 9312
listen = 9306:mysql41
log = /home/yejianfeng/instance/coreseek/var/log/searchd.log
query_log = /home/yejianfeng/instance/coreseek/var/log/query.log
read_timeout = 5
client_timeout = 300
max_children = 30
pid_file = /home/yejianfeng/instance/coreseek/var/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256
max_filter_values = 4096
max_batch_queries = 32
workers = threads
}