④. ik_smart:会做最粗粒度的拆分,比如会将“中华人民共和国人民大会堂”拆分为中华人民共和国、人民大会堂。(前台搜索的时候用 ik_smart)
GET _analyze { "analyzer": "ik_smart", "text":"中华人民共和国人民大会堂" } { "tokens" : [ { "token" : "中华人民共和国", "start_offset" : 0, "end_offset" : 7, "type" : "CN_WORD", "position" : 0 }, { "token" : "人民大会堂", "start_offset" : 7, "end_offset" : 12, "type" : "CN_WORD", "position" : 1 } ] }
⑥. 自定义分词器
- ①. 修改/usr/share/elasticsearch/plugins/ik/config中的IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 扩展配置</comment> <!--用户可以在这里配置自己的扩展字典 --> <entry key="ext_dict"></entry> <!--用户可以在这里配置自己的扩展停止词字典--> <entry key="ext_stopwords"></entry> <!--用户可以在这里配置远程扩展字典 --> <entry key="remote_ext_dict">http://192.168.56.10/es/fenci.txt</entry> <!--用户可以在这里配置远程扩展停止词字典--> <!-- <entry key="remote_ext_stopwords">words_location</entry> --> </properties>
②. 修改完成后,需要重启elasticsearch容器,否则修改不生效。docker restart elasticsearch
GET _analyze { "analyzer": "ik_smart", "text":"唐智谷粒商城" } { "tokens" : [ { "token" : "唐智谷粒商城", "start_offset" : 0, "end_offset" : 6, "type" : "CN_WORD", "position" : 0 } ] }
③. 具体的操作步骤
[root@localhost ~]# docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 95de12634192 elasticsearch:7.4.2 "/usr/local/bin/dock…" 4 seconds ago Up 3 seconds 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp, 0.0.0.0:9300->9300/tcp, :::9300->9300/tcp elasticsearch a197c1d2cf05 kibana:7.4.2 "/usr/local/bin/dumb…" 30 hours ago Up About a minute 0.0.0.0:5601->5601/tcp, :::5601->5601/tcp kibana a18680bef63e redis "docker-entrypoint.s…" 5 weeks ago Up 2 minutes 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp redis 91e02812975d mysql:5.7 "docker-entrypoint.s…" 5 weeks ago Up 2 minutes 0.0.0.0:3306->3306/tcp, :::3306->3306/tcp, 33060/tcp mysql [root@localhost ~]# cd /mydata/ [root@localhost mydata]# ls elasticsearch mysql redis [root@localhost mydata]# mkdir nginx [root@localhost mydata]# docker images REPOSITORY TAG IMAGE ID CREATED SIZE redis latest 08502081bff6 8 weeks ago 105MB mysql 5.7 09361feeb475 2 months ago 447MB kibana 7.4.2 230d3ded1abc 22 months ago 1.1GB elasticsearch 7.4.2 b1179d41a7b4 22 months ago 855MB [root@localhost mydata]# docker run -p80:80 --name nginx -d nginx:1.10 Unable to find image 'nginx:1.10' locally 1.10: Pulling from library/nginx 6d827a3ef358: Pull complete 1e3e18a64ea9: Pull complete 556c62bb43ac: Pull complete Digest: sha256:6202beb06ea61f44179e02ca965e8e13b961d12640101fca213efbfd145d7575 Status: Downloaded newer image for nginx:1.10 24c1454acf9f8419f762f3369b59557df57cd6209864ef64000f2f26d9f0d05b [root@localhost mydata]# mkdir -p /mydata/nginx/html [root@localhost mydata]# mkdir -p /mydata/nginx/logs [root@localhost mydata]# mkdir -p /mydata/nginx/conf [root@localhost mydata]# ls elasticsearch mysql nginx redis [root@localhost mydata]# cd nginx/ [root@localhost nginx]# ls conf html logs [root@localhost nginx]# cd .. [root@localhost mydata]# rm -rf nginx/ [root@localhost mydata]# docker container cp nginx:/etc/nginx . [root@localhost mydata]# ls elasticsearch mysql nginx redis [root@localhost mydata]# docker stop nginx nginx [root@localhost mydata]# docker rm nginx nginx [root@localhost mydata]# ls elasticsearch mysql nginx redis [root@localhost mydata]# cd nginx [root@localhost nginx]# ls conf.d fastcgi_params koi-utf koi-win mime.types modules nginx.conf scgi_params uwsgi_params win-utf [root@localhost nginx]# cd .. [root@localhost mydata]# mv nginx conf [root@localhost mydata]# ls conf elasticsearch mysql redis [root@localhost mydata]# mkdir nginx [root@localhost mydata]# mv conf nginx/ [root@localhost mydata]# ls elasticsearch mysql nginx redis [root@localhost mydata]# cd nginx/ [root@localhost nginx]# ls conf [root@localhost nginx]# docker run -p 80:80 --name nginx \ > -v /mydata/nginx/html:/usr/share/nginx/html \ > -v /mydata/nginx/logs:/var/log/nginx \ > -v /mydata/nginx/conf/:/etc/nginx \ > -d nginx:1.10 01bfbb6a8cd0e3f6af476793ad33fdc696740eadb125f8adad573303524adb55 [root@localhost nginx]# ls conf html logs [root@localhost nginx]# docker update nginx --restart=always nginx [root@localhost nginx]# echo '<h2>hello nginx!</h2>' >index.html [root@localhost nginx]# ls conf html index.html logs [root@localhost nginx]# rm -rf index.html [root@localhost nginx]# cd html [root@localhost html]# echo '<h2>hello nginx!</h2>' >index.html [root@localhost html]# [root@localhost html]# mkdir es [root@localhost html]# cd es [root@localhost es]# vi fenci.text [root@localhost es]# ls fenci.text [root@localhost es]# mv fenci.text fenci.txt [root@localhost es]# cd /mydata/ [root@localhost mydata]# cd elasticsearch/ [root@localhost elasticsearch]# ls config data plugins [root@localhost elasticsearch]# cd plugins/ [root@localhost plugins]# ls ik [root@localhost plugins]# cd ik/ [root@localhost ik]# ls commons-codec-1.9.jar config httpclient-4.5.2.jar plugin-descriptor.properties commons-logging-1.2.jar elasticsearch-analysis-ik-7.4.2.jar httpcore-4.4.4.jar plugin-security.policy [root@localhost ik]# cd config/ [root@localhost config]# ls extra_main.dic extra_single_word_full.dic extra_stopword.dic main.dic quantifier.dic suffix.dic extra_single_word.dic extra_single_word_low_freq.dic IKAnalyzer.cfg.xml preposition.dic stopword.dic surname.dic [root@localhost config]# vi IKAnalyzer.cfg.xml [root@localhost config]# docker restart elasticsearch elasticsearch [root@localhost config]# cd /mydata/nginx/ [root@localhost nginx]# ls conf html logs [root@localhost nginx]# cd html/es/ [root@localhost es]# ls fenci.txt [root@localhost es]# cat fenci.txt 唐智谷粒商城