搜索示例
数据准备
创建表
CREATE TABLE `blog` ( `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键id', `title` varchar(60) DEFAULT NULL COMMENT '标题', `author` varchar(60) DEFAULT NULL COMMENT '作者', `content` text COMMENT '内容', `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', `update_time` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
获取测试数据
# -*- coding: utf-8 -*- from pprint import pprint import requests from parsel import Selector from puremysql import PureMysql def get_data(url): """ 获取古诗文网数据 eg: https://www.gushiwen.cn/ :return: list """ response = requests.get(url) sel = Selector(text=response.text) rows = sel.css(".main3 .left .sons") lst = [] for row in rows: title = row.css("b::text").extract_first() author = row.css(".source").xpath("string(.)").extract_first() content = row.css(".contson").xpath("string(.)").extract_first() if not title: continue item = { "title": title.strip(), "author": author.strip(), "content": content.replace('\n', ''), } pprint(item) lst.append(item) return lst def insert_data(lst): """ 数据入库 """ con = PureMysql(db_url="mysql://root:123456@127.0.0.1:3306/data?charset=utf8") table = con.table("blog") ret = table.insert(lst) con.close() print("成功入库", ret) def main(): # url = "https://www.gushiwen.cn/" for page in range(1, 11): url = f"https://www.gushiwen.cn/default.aspx?page={page}" lst = get_data(url) insert_data(lst) if __name__ == '__main__': main()
logstash同步数据配置
config/jdbc.conf
input { jdbc { jdbc_driver_library => "mysql-connector-java-8.0.16.jar" jdbc_driver_class => "com.mysql.cj.jdbc.Driver" jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/data" jdbc_user => "root" jdbc_password => "123456" statement => "SELECT id, title, content, author, create_time, update_time FROM blog" jdbc_paging_enabled => "true" jdbc_page_size => "5000" } } filter { } output { stdout { codec => rubydebug } elasticsearch { index => "blog", document_id => "%{id}" } }
同步数据
# 检查配置文件 $ ./bin/logstash -t -f config/jdbc.conf # 执行配置文件 $ ./bin/logstash -f config/jdbc.conf
问题及处理
处理elasticsearch跨域问题
config/elasticsearch.yml
http.cors.enabled: true
http.cors.allow-origin: "*"
搜索提示
高亮结果显示
POST /blog/_search { "query": { "match": { "author": "李白" } }, "highlight": { "fields": { "author": {} } } }
搜索模板
将查询和参数分离
POST /blog/_search/template { "source": { "query": { "match": { "{{key}}": "{{value}}" } }, "size": "{{size}}" }, "params": { "key": "author", "value": "李白", "size": 10 } }
其他语句
# 调试模板渲染结果: GET _render/template # 取回模板定义的语法: GET _scripts/<templatename> # 删除模板定义的语法: DELETE _scripts/<templatename>
创建模板
POST /_scripts/blog_template_v1 { "script": { "lang": "mustache", "source": { "query": { "match": { "{{key}}": "{{value}}" } }, "highlight": { "fields": { "{{key}}": {} } }, "size": "{{size}}" } } }
使用模板
POST /blog/_search/template { "id": "blog_template_v1", "params": { "key": "author", "value": "李白", "size": 10 } }
模糊查询
GET test/_search { "query": { "match": { "doc":{ "query": "elastix", "fuzziness": "AUTO" } } } }
优化查询
POST _scripts/blog_template_v1 { "script": { "lang": "mustache", "source": { "size": "{{size}}", "query": { "bool": { "should": [ { "prefix": { "{{field}}.keyword": { "value": "{{query}}", "boost": 10 } } }, { "match_phrase_prefix": { "{{field}}": { "query": "{{query}}", "boost": 2 } } }, { "match": { "{{field}}": "{{query}}" } } ] } }, "_source": [ "title", "id", "uid", "views" ] } } }
重建索引
# 新建索引 PUT blog_v1 # 查看原索引的mapping GET blog/_mapping # 设置索引的mapping POST blog_v1/doc/_mapping { "doc": { "properties": { "@timestamp": { "type": "date" }, "@version": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "author": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "content": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "create_time": { "type": "date" }, "id": { "type": "long" }, "title": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "update_time": { "type": "date" } } } } # 索引迁移 POST _reindex { "source": {"index": "blog"}, "dest": {"index": "blog_v1"} } # 查询测试 POST /blog_v1/_search
索引别名
# 查看别名 GET _cat/aliases # 添加别名 POST /_aliases { "actions": [ { "add": { "index": "blog", "alias": "my-blog" } } ] } # 切换别名 POST /_aliases { "actions": [ { "add": { "index": "blog_v1", "alias": "my-blog" } }, { "remove": { "index": "blog", "alias": "my-blog" } } ] } # 通过别名搜索 POST my-blog/_search
拼音处理的插件
https://github.com/medcl/elasticsearch-analysis-pinyin/releases/tag/v6.3.2
添加拼音搜索字段
# 关闭索引 POST my-blog/_close # 设置索引支持拼音分析器 PUT my-blog/_settings { "index": { "analysis": { "analyzer": { "pinyin_analyzer": { "tokenizer": "my_pinyin" } }, "tokenizer": { "my_pinyin": { "type": "pinyin", "keep_first_letter": true, "keep_separate_first_letter": true, "keep_full_pinyin": true, "keep_original": false, "limit_first_letter_length": 16, "lowercase": true } } } } } # 打开索引 POST my-blog/_open # 获取原索引mapping GET my-blog/_mapping # 添加字段 PUT my-blog/doc/_mapping { "doc": { "properties": { "@timestamp": { "type": "date" }, "@version": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "author": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 }, "pinyin": { "type": "text", "analyzer": "pinyin_analyzer" } } }, "content": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "create_time": { "type": "date" }, "id": { "type": "long" }, "title": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "update_time": { "type": "date" } } } } # 更新索引 POST my-blog/_update_by_query?conflicts=proceed # 测试拼音搜索 POST my-blog/_search { "query": {"match": { "author.pinyin": "libai" }} }
前端显示
<html lang="en"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>Document</title> <!-- 开发环境版本,包含了有帮助的命令行警告 --> <script src="https://cdn.jsdelivr.net/npm/vue/dist/vue.js"></script> <!-- 引入样式 --> <link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css" /> <!-- 引入组件库 --> <script src="https://unpkg.com/element-ui/lib/index.js"></script> <!-- axios --> <script src="https://unpkg.com/axios/dist/axios.min.js"></script> <style> /* 居中显示 */ #app { width: 200px; margin: 0 auto; margin-top: 300px; } /* 搜索结果高亮 */ em { color: red; } </style> </head> <body> <div id="app"> <el-autocomplete v-model="state" :fetch-suggestions="querySearchAsync" placeholder="请输入内容" @select="handleSelect" > <!-- 自定义显示 --> <template slot-scope="{ item }"> <div v-html="item.highlight.author[0]"></div> </template> </el-autocomplete> </div> <script> new Vue({ el: "#app", data() { return { list: [], state: "", }; }, methods: { async querySearchAsync(queryString, cb) { // 查询地址 const QUERY_URL = "http://localhost:9200/blog/_search"; // 查询语句 let query = { query: { match: { author: queryString, }, }, highlight: { fields: { author: {} }, }, }; const res = await axios.post(QUERY_URL, query); console.log(res.data.hits.hits); cb(res.data.hits.hits); }, handleSelect(item) { console.log(item); }, }, }); </script> </body> </html>