③. ik_max_word:会将文本做最细粒度的拆分,比如会将“中华人民共和国人民大会堂”拆分为“中华人民共和国、中华人民、中华、 华人、人民共和国、人民、共和国、大会堂、大会、会堂等词语(索引的时候用ik_max_word)
{ "tokens" : [ { "token" : "中华人民共和国", "start_offset" : 0, "end_offset" : 7, "type" : "CN_WORD", "position" : 0 }, { "token" : "中华人民", "start_offset" : 0, "end_offset" : 4, "type" : "CN_WORD", "position" : 1 }, { "token" : "中华", "start_offset" : 0, "end_offset" : 2, "type" : "CN_WORD", "position" : 2 }, { "token" : "华人", "start_offset" : 1, "end_offset" : 3, "type" : "CN_WORD", "position" : 3 }, { "token" : "人民共和国", "start_offset" : 2, "end_offset" : 7, "type" : "CN_WORD", "position" : 4 }, { "token" : "人民", "start_offset" : 2, "end_offset" : 4, "type" : "CN_WORD", "position" : 5 }, { "token" : "共和国", "start_offset" : 4, "end_offset" : 7, "type" : "CN_WORD", "position" : 6 }, { "token" : "共和", "start_offset" : 4, "end_offset" : 6, "type" : "CN_WORD", "position" : 7 }, { "token" : "国人", "start_offset" : 6, "end_offset" : 8, "type" : "CN_WORD", "position" : 8 }, { "token" : "人民大会堂", "start_offset" : 7, "end_offset" : 12, "type" : "CN_WORD", "position" : 9 }, { "token" : "人民大会", "start_offset" : 7, "end_offset" : 11, "type" : "CN_WORD", "position" : 10 }, { "token" : "人民", "start_offset" : 7, "end_offset" : 9, "type" : "CN_WORD", "position" : 11 }, { "token" : "大会堂", "start_offset" : 9, "end_offset" : 12, "type" : "CN_WORD", "position" : 12 }, { "token" : "大会", "start_offset" : 9, "end_offset" : 11, "type" : "CN_WORD", "position" : 13 }, { "token" : "会堂", "start_offset" : 10, "end_offset" : 12, "type" : "CN_WORD", "position" : 14 } ] }