需求
搜素关键字自动补全(suggest)
输入“人工” 自动带出人工开头的关键字
Kibana 界面操作 实现 搜素关键字自动补全(suggest)
ES使用Completion Suggest 做关键字自动补全时,实际应用中搜索性能更加高效,建议多开一个子字段,如下示例,假设要根据title字段做关键字自动补全,不要改原字段的类型,多开一个子字段title.suggest,类型设置为completion,然后之后的suggest针对title.suggest字段做操作
- Term Suggester:词条建议器。对给输入的文本进进行分词,为每个分词提供词项建议, 基于编辑距离,对analyze过的单个term去提供建议,并不会考虑多个term/词组之间的关系。quert -> query
- Phrase Suggester:短语建议器,在term的基础上,会考量多个term之间的关系在Term Suggester的基础上,通过ngram以词组为单位返回建议。noble prize -> nobel prize
- Completion Suggester:它主要针对的应用场景就是"Auto Completion",FST数据结构,类似Trie树,不用打开倒排,快速返回,前缀匹配
- Context Suggester:上下文建议器,在Completion Suggester的基础上,用于filter和boost
创建索引
## 创建索引并指定结构 PUT /article-index { "settings": { "number_of_shards": 3, "number_of_replicas": 0 }, "mappings": { "properties":{ "id":{ "type":"keyword" }, "title":{ "type":"text", "analyzer":"ik_max_word", "fields": { # 扩展一个字段,用于关键字自动补全查询 "suggest" : { "type" : "completion", "analyzer": "ik_max_word" } } }, "summary":{ "type":"text", "analyzer":"ik_max_word" }, "createDate":{ "type":"date", "format":"yyyy-MM-dd HH:mm:ss||yyyy-MM-dd" } } } }
添加数据
JSON { 括号里面的内容,不能换行 }
# _bulk 批量添加文档 POST /article-index/_doc/_bulk {"index":{"_id":1}} {"id":1,"title":"人工智能技术","summary":"ElasticSearch 实现分词全文检索 - ES、Kibana、IK安装","createDate":"2023-02-23"} {"index":{"_id":2}} {"id":2,"title":"人工智能软件 Chart GTP","summary":"太极生两仪,两仪生四象,四象生八卦","createDate":"2023-02-23"} {"index":{"_id":3}} {"id":3,"title":"Restful基本操作","summary":"ElasticSearch 实现分词全文检索 - Java SpringBoot ES 索引操作","createDate":"2023-02-23"} {"index":{"_id":4}} {"id":4,"title":"人工呼吸","summary":"ElasticSearch 实现分词全文检索 - 经纬度查询","createDate":"2023-02-23"} {"index":{"_id":5}} {"id":5,"title":"SpringBoot 全文检索实战","summary":"ElasticSearch 实现分词全文检索 - SpringBoot 全文检索实战","createDate":"2023-02-23"}
查询数据
## 查询 GET /article-index/_doc/_search { "suggest": { "my-suggest" : { "prefix" : "人", "completion" : { "field" : "title.suggest" } } } }
返回值--自动带出人开头的关键字
{ "took" : 3, "timed_out" : false, "_shards" : { "total" : 3, "successful" : 3, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "my-suggest" : [ { "text" : "人", "offset" : 0, "length" : 1, "options" : [ { "text" : "人工呼吸", "_index" : "article-index", "_type" : "_doc", "_id" : "4", "_score" : 1.0, "_source" : { "id" : 4, "title" : "人工呼吸", "summary" : "ElasticSearch 实现分词全文检索 - 经纬度查询", "createDate" : "2023-02-23" } }, { "text" : "人工智能技术", "_index" : "article-index", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "id" : 1, "title" : "人工智能技术", "summary" : "ElasticSearch 实现分词全文检索 - ES、Kibana、IK安装", "createDate" : "2023-02-23" } }, { "text" : "人工智能软件 Chart GTP", "_index" : "article-index", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "id" : 2, "title" : "人工智能软件 Chart GTP", "summary" : "太极生两仪,两仪生四象,四象生八卦", "createDate" : "2023-02-23" } } ] } ] } }
JAVA SpringBoot 实现 搜素关键字自动补全(suggest)
创建索引
/** * 第一步:系统初始化,创建索引 * 如果索引不存在,创建,输出 */ @Test void createIndexTest() throws Exception { boolean indexExists = elasticSearchUtil.indexExists(INDEX_NAME); if (!indexExists) { try { createIndex(INDEX_NAME); logger.info("索引【{}】,创建成功", INDEX_NAME); //测试效果 可再次查询验证。 indexExists = elasticSearchUtil.indexExists(INDEX_NAME); logger.info("索引【{}】, {}", INDEX_NAME, indexExists ? "验证存在" : "验证不存在"); } catch (Exception e) { logger.error(e.getMessage(), e); } } else { logger.info("索引【{}】已存在,无需创建", INDEX_NAME); } } /** * 创建索引 * * @param indexName * @throws Exception */ void createIndex(String indexName) throws Exception { //准备索引的 settings Settings.Builder settings = Settings.builder() .put("number_of_shards", INDEX_NUMBER_OF_SHARDS) //分片数,可以使用常量 .put("number_of_replicas", esProperties.getReplicasNum()); //是否集群,需要多少副本,在配置文件中配置 //准备索引的结构 Mappings XContentBuilder mappings = JsonXContent.contentBuilder() .startObject() .startObject("properties") .startObject("id").field("type", "keyword").endObject() .startObject("title").field("type", "text").field("analyzer", "ik_max_word") .startObject("fields").startObject("suggest").field("type", "completion").field("analyzer", "ik_max_word").endObject().endObject() .endObject() //对该字段进行分词 .startObject("summary").field("type", "text").field("analyzer", "ik_max_word").endObject() //对该字段进行分词 .startObject("createDate").field("type", "date").field("format", "yyyy-MM-dd HH:mm:ss").endObject() .endObject() .endObject(); CreateIndexResponse resp = elasticSearchUtil.createIndex(indexName, settings, mappings); //输出 logger.info("CreateIndexResponse => {} ", resp.toString()); }
添加数据
/** * 第二步:模拟后台管理员,在添加文章时,将要检查的字段内容,同步到ES中 */ @Test void addArticleTest() throws Exception { Map<Integer, String> titleMap = new HashMap<>(); titleMap.put(1, "人工智能技术"); titleMap.put(2, "人工智能软件 Chart GTP"); titleMap.put(3, "Restful基本操作"); titleMap.put(4, "Java SpringBoot ES 索引操作"); titleMap.put(5, "Java SpringBoot ES 文档操作"); titleMap.put(6, "人工呼吸"); titleMap.put(7, "SpringBoot 全文检索实战"); Map<Integer, String> introMap = new HashMap<>(); introMap.put(1, "ElasticSearch 实现分词全文检索 - 概述"); introMap.put(2, "ElasticSearch 实现分词全文检索 - ES、Kibana、IK安装"); introMap.put(3, "ElasticSearch 实现分词全文检索 - Restful基本操作"); introMap.put(4, "ElasticSearch 实现分词全文检索 - Java SpringBoot ES 索引操作"); introMap.put(5, "ElasticSearch 实现分词全文检索 - Java SpringBoot ES 文档操作"); introMap.put(6, "ElasticSearch 实现分词全文检索 - 经纬度查询"); introMap.put(7, "ElasticSearch 实现分词全文检索 - SpringBoot 全文检索实战"); //内容 Map<Integer, String> contentMap = new HashMap<>(); contentMap.put(1, "【阿里云】尊敬的vipsoft:您有2台云服务器ECS配置升级成功。如有CPU、内存变更或0Mbps带宽升级,您需要在ECS控制台手动重启云服务器后才能生效。"); contentMap.put(2, "为更好地为您提供服务,温馨提醒:您本月有1次抽奖机会,赢取大额通用流量,月月抽月月领,点击掌厅链接 原URL:http://wap.js.10086.cn/Mq 快来试试你的运气吧,如本月已参与请忽略【江苏移动心级服务,让爱连接】"); contentMap.put(3, "国家反诈中心提醒:公检法机关会当面向涉案人员出示证件或法律文书,绝对不会通过网络给当事人发送通缉令、拘留证、逮捕证等法律文书,并要求转账汇款。\n" + "切记:公检法机关不存在所谓“安全账户”,更不会让你远程转账汇款!"); contentMap.put(4, "【江苏省公安厅、江苏省通信管理局】温馨提示:近期利用苹果手机iMessage消息冒充熟人、冒充领导换号、添加新微信号等诈骗形式多发。如有收到类似短信,请您谨慎判断,苹果手机用户如无需要可关闭iMessage功能,以免上当受骗。"); contentMap.put(5, "多一点快乐,少一点懊恼,不管钞票有多少,只有天天开心就好,累了就睡觉,生活的甜苦,自己来调味。收到信息就要开心的笑"); contentMap.put(6, "黄金周好运每天交,我把祝福来送到:愿您生活步步高,彩票期期中,股票每天涨,生意年年旺,祝您新年新景象!"); contentMap.put(7, "【阿里云】当你手机响,那是我的问候;当你收到短信,那有我的心声;当你翻阅短信,那有我的牵挂;当你筹备关机时,记得我今天说过周末快乐!"); contentMap.put(8, "我刚去了一趟银行,取了无数的幸福黄金好运珠宝平安翡翠成功股票健康基金。嘘!别作声,统统的送给你,因为我想提“钱”祝你国庆节快乐!"); contentMap.put(9, "一个人的精彩,一个人的打拼,一个人的承载,一个人的舞蹈。光棍节送你祝福,不因你是光棍,只因你生活色彩。祝你:快乐打拼,生活出彩!"); contentMap.put(10, "爆竹响激情燃放,雪花舞祥风欢畅,烟火腾期待闪亮,感动涌心中激荡,心情美春节冲浪,愿景好心中珍藏,祝与福短信奉上:祝您身体健康,兔年吉祥!"); //模似7次 添加文章 for (int i = 1; i <= 7; i++) { ArticleInfo article = new ArticleInfo(); article.setId(String.valueOf(i)); article.setTitle(titleMap.get(i)); article.setAuthor("VipSoft"); article.setSummary(introMap.get(i)); article.setContent(contentMap.get(i)); article.setCreateTime(new Date()); //将article 保存到 MySQL --- 省略 boolean flag = true; //保存数据到 MySQL 数据库成功 if (flag) { //将需要查询的数据,赋给DTO,更新到 ES中 ArticleDTO articleDTO = new ArticleDTO(); BeanUtils.copyProperties(article, articleDTO); String json = JSON.toJSONStringWithDateFormat(articleDTO, "yyyy-MM-dd HH:mm:ss"); //FastJson 将日期格式化 IndexResponse resp = elasticSearchUtil.createDoc(INDEX_NAME, articleDTO.getId(), json); logger.info(" {}", resp.getResult().toString()); } } }
查询数据
/** * 第三步:模拟用户搜索,输入关键词“人”,带出和人有关的关键词 */ @Test void earchTest() throws Exception { List<String> resp = elasticSearchUtil.suggest(INDEX_NAME, "title.suggest", "人", 2); //4. 获取到 _source 中的数据,并展示 for (String hit : resp) { System.out.println(hit); } } /** * 自动补全 根据用户的输入联想到可能的词或者短语 * * @param indexName 索引名称 * @param field 搜索条件字段 * @param keywords 搜索关键字 * @param size 匹配数量 * @return * @throws Exception */ public List<String> suggest(String indexName, String field, String keywords, int size) throws Exception { //定义返回 List<String> suggestList = new ArrayList<>(); //构建查询请求 SearchRequest searchRequest = new SearchRequest(indexName); //通过查询构建器定义评分排序 SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC)); //构造搜索建议语句,搜索条件字段 CompletionSuggestionBuilder completionSuggestionBuilder = new CompletionSuggestionBuilder(field); //搜索关键字 completionSuggestionBuilder.prefix(keywords); //去除重复 completionSuggestionBuilder.skipDuplicates(true); //匹配数量 completionSuggestionBuilder.size(size); searchSourceBuilder.suggest(new SuggestBuilder().addSuggestion("article-suggest", completionSuggestionBuilder)); //article-suggest为返回的字段,所有返回将在article-suggest里面,可写死,sort按照评分排序 searchRequest.source(searchSourceBuilder); //定义查找响应 SearchResponse suggestResponse = esClient.search(searchRequest, RequestOptions.DEFAULT); //定义完成建议对象 CompletionSuggestion completionSuggestion = suggestResponse.getSuggest().getSuggestion("article-suggest"); List<CompletionSuggestion.Entry.Option> optionsList = completionSuggestion.getEntries().get(0).getOptions(); //从optionsList取出结果 if (!CollectionUtils.isEmpty(optionsList)) { optionsList.forEach(item -> suggestList.add(item.getText().toString())); } return suggestList; }