大家都知道,es的索引创建完成之后就不可以再修改了,包括你想更改字段属性或者是分词方式等。那么随着业务数据量的发展,可能会出现需要修改索引,或者说叫做重建索引的情况,那么这个时候应该怎么操作呢?本文主要就这个问题进行讨论处理。
处理流程
整体的重建索引的处理流程就是,先创建一个临时索引,将原始索引中的数据迁移到临时索引,然后再删除原始索引,重新创建原始索引后,在将临时索引中的数据迁回到重建索引,从而完成索引的重建操作。
创建临时索引
在创建索引之前,我们先看一下原始的 es 索引结构,在 kibana 开发工具命令行页面执行命令
GET crm_meiqia_conversation/_mapping
这里我需要将字段 convId 的字段类型 改为 text ,那么这个时候我就需要创建一个临时索引 crm_meiqia_conversation_tmp 将字段 convId 的字段类型改为 text ,原始 convId 属性如下图
整个执行命令代码如下
PUT /crm_meiqia_conversation_tmp { "mappings" : { "meiqiaConversation" : { "properties" : { "convId" : { "type" : "text" }, "enterpriseId" : { "type" : "long" }, "devClientId" : { "type" : "text" }, "pageFromUrl" : { "type" : "text" }, "pageLandUrl" : { "type" : "text" }, "pageLandTitle" : { "type" : "text" }, "pageConvUrl" : { "type" : "text" }, "pageConvTitle" : { "type" : "text" }, "searchEngineName" : { "type" : "text" }, "searchEngineKw" : { "type" : "text" }, "visitorIp" : { "type" : "text" }, "visitorLocation" : { "type" : "text" }, "visitorOs" : { "type" : "text" }, "visitorBrowser" : { "type" : "text" }, "visitorTags" : { "type" : "text" }, "clientId" : { "type" : "long" }, "agentAccount" : { "type" : "text" }, "agentName" : { "type" : "text" }, "agentId" : { "type" : "text" }, "agentNickName" : { "type" : "text" }, "groupId" : { "type" : "long" }, "groupName" : { "type" : "text" }, "convStartTm" : { "type" : "long" }, "convStartDate" : { "type" : "date" }, "convEndTm" : { "type" : "long" }, "convEndDate" : { "type" : "date" }, "convFirstRespWaitInSecs" : { "type" : "long" }, "convAgentMsgCount" : { "type" : "long" }, "convVisitorMsgCount" : { "type" : "long" }, "convQualityGrade" : { "type" : "text" }, "convLeads" : { "type" : "text" }, "commentLevel" : { "type" : "long" }, "commentContent" : { "type" : "text" }, "platform" : { "type" : "text" }, "summaryContent" : { "type" : "text" }, "summaryUpdateAt" : { "type" : "text" }, "sourceType" : { "type" : "text" }, "sourceField" : { "type" : "text" }, "agentRespDuration" : { "type" : "long" }, "effective" : { "type" : "text" }, "missed" : { "type" : "text" }, "converseDuration" : { "type" : "long" }, "appName" : { "type" : "text" }, "mainChannel" : { "type" : "text" }, "mainChannelName" : { "type" : "text" }, "subChannel" : { "type" : "text" }, "subChannelName" : { "type" : "text" }, "searchEngine" : { "type" : "text" }, "clientInfo" : { "properties" : { "address" : { "type" : "text" }, "age" : { "type" : "long" }, "channelName" : { "type" : "text" }, "comment" : { "type" : "text" }, "contact" : { "type" : "text" }, "convId" : { "type" : "long" }, "email" : { "type" : "text" }, "enterpriseId" : { "type" : "long" }, "followSource" : { "type" : "text" }, "gender" : { "type" : "text" }, "infoId" : { "type" : "long" }, "jijiaoCity" : { "type" : "text" }, "jijiaoDistrict" : { "type" : "text" }, "jijiaoLevel" : { "type" : "text" }, "jijiaoProvince" : { "type" : "text" }, "mTrackId" : { "type" : "text" }, "name" : { "type" : "text" }, "openid" : { "type" : "text" }, "qq" : { "type" : "text" }, "sourceName" : { "type" : "text" }, "tel" : { "type" : "text" }, "trackId" : { "type" : "text" }, "uid" : { "type" : "text" }, "vid" : { "type" : "text" }, "visitorName" : { "type" : "text" }, "weibo" : { "type" : "text" }, "weixin" : { "type" : "text" }, "appChannel" : { "type" : "text" } } }, "convContent" : { "properties" : { "contentId" : { "type" : "long" }, "convId" : { "type" : "long" }, "convFrom" : { "type" : "text" }, "timestamp" : { "type" : "long" }, "content" : { "type" : "text", "analyzer":"standard" }, "remoteContent" : { "type" : "text" }, "convType" : { "type" : "text" } } }, "convTag" : { "properties" : { "tagId" : { "type" : "long" }, "convId" : { "type" : "long" }, "level" : { "type" : "long" }, "value" : { "type" : "text" } } } } } }, "settings" : { "number_of_shards":2, "number_of_replicas" : 1, "refresh_interval":"1s" } }
在 kibana 工具页面点击执行按钮
这里可以看到执行命令报错 400 根据提示信息来看 说明当前 es 中已经存在索引 crm_meiqia_conversation_tmp ,那么执行删除索引命令,删除后再执行刚才创建临时索引命令
DELETE /crm_meiqia_conversation_tmp
再次执行创建临时索引命令,执行成功
数据迁移
临时索引创建完成之后,我们就可以将原始索引中的数据先迁移到临时索引中,通过 ES 提供了 _reindex 这个API 进行数据复制迁移,执行命令
POST _reindex { "source": { "index": "crm_meiqia_conversation", "size":500 }, "dest": { "index": "crm_meiqia_conversation_tmp" } }
或者 异步迁移数据
POST _reindex?wait_for_completion=false { "source": { "index": "crm_meiqia_conversation", "size":500 }, "dest": { "index": "crm_meiqia_conversation_tmp" } }
其中,source 对应的是原始索引,dest 对应的是新建的临时索引,参数 size 表示每次执行的数据量为500 条,循环执行直到数据迁移复制结束。默认情况下, _reindex 使用 1000 进行批量操作,迁移成功如图
这个时候我们再来看一下原始索引中数据总数 crm_meiqia_conversation 与临时索引 crm_meiqia_conversation_tmp 中数据总数是否一致,执行命令
GET crm_meiqia_conversation/_count GET crm_meiqia_conversation_tmp/_count
执行结果如图
那么这样就完成了数据从原始索引迁移复制到临时索引的操作。
重建索引
这个时候就需要执行命令删除原始索引 crm_meiqia_conversation ,然后按照临时索引的 创建语句 创建新的索引,最后再将临时索引中的数据 迁移复制到 新建的原始索引中去,执行命令
# 删除原始索引 DELETE /crm_meiqia_conversation # 创建更改字段后的新的原始索引 PUT /crm_meiqia_conversation { "mappings" : { "meiqiaConversation" : { "properties" : { "convId" : { "type" : "text" }, "enterpriseId" : { "type" : "long" }, "devClientId" : { "type" : "text" }, "pageFromUrl" : { "type" : "text" }, "pageLandUrl" : { "type" : "text" }, "pageLandTitle" : { "type" : "text" }, "pageConvUrl" : { "type" : "text" }, "pageConvTitle" : { "type" : "text" }, "searchEngineName" : { "type" : "text" }, "searchEngineKw" : { "type" : "text" }, "visitorIp" : { "type" : "text" }, "visitorLocation" : { "type" : "text" }, "visitorOs" : { "type" : "text" }, "visitorBrowser" : { "type" : "text" }, "visitorTags" : { "type" : "text" }, "clientId" : { "type" : "long" }, "agentAccount" : { "type" : "text" }, "agentName" : { "type" : "text" }, "agentId" : { "type" : "text" }, "agentNickName" : { "type" : "text" }, "groupId" : { "type" : "long" }, "groupName" : { "type" : "text" }, "convStartTm" : { "type" : "long" }, "convStartDate" : { "type" : "date" }, "convEndTm" : { "type" : "long" }, "convEndDate" : { "type" : "date" }, "convFirstRespWaitInSecs" : { "type" : "long" }, "convAgentMsgCount" : { "type" : "long" }, "convVisitorMsgCount" : { "type" : "long" }, "convQualityGrade" : { "type" : "text" }, "convLeads" : { "type" : "text" }, "commentLevel" : { "type" : "long" }, "commentContent" : { "type" : "text" }, "platform" : { "type" : "text" }, "summaryContent" : { "type" : "text" }, "summaryUpdateAt" : { "type" : "text" }, "sourceType" : { "type" : "text" }, "sourceField" : { "type" : "text" }, "agentRespDuration" : { "type" : "long" }, "effective" : { "type" : "text" }, "missed" : { "type" : "text" }, "converseDuration" : { "type" : "long" }, "appName" : { "type" : "text" }, "mainChannel" : { "type" : "text" }, "mainChannelName" : { "type" : "text" }, "subChannel" : { "type" : "text" }, "subChannelName" : { "type" : "text" }, "searchEngine" : { "type" : "text" }, "clientInfo" : { "properties" : { "address" : { "type" : "text" }, "age" : { "type" : "long" }, "channelName" : { "type" : "text" }, "comment" : { "type" : "text" }, "contact" : { "type" : "text" }, "convId" : { "type" : "long" }, "email" : { "type" : "text" }, "enterpriseId" : { "type" : "long" }, "followSource" : { "type" : "text" }, "gender" : { "type" : "text" }, "infoId" : { "type" : "long" }, "jijiaoCity" : { "type" : "text" }, "jijiaoDistrict" : { "type" : "text" }, "jijiaoLevel" : { "type" : "text" }, "jijiaoProvince" : { "type" : "text" }, "mTrackId" : { "type" : "text" }, "name" : { "type" : "text" }, "openid" : { "type" : "text" }, "qq" : { "type" : "text" }, "sourceName" : { "type" : "text" }, "tel" : { "type" : "text" }, "trackId" : { "type" : "text" }, "uid" : { "type" : "text" }, "vid" : { "type" : "text" }, "visitorName" : { "type" : "text" }, "weibo" : { "type" : "text" }, "weixin" : { "type" : "text" }, "appChannel" : { "type" : "text" } } }, "convContent" : { "properties" : { "contentId" : { "type" : "long" }, "convId" : { "type" : "long" }, "convFrom" : { "type" : "text" }, "timestamp" : { "type" : "long" }, "content" : { "type" : "text", "analyzer":"standard" }, "remoteContent" : { "type" : "text" }, "convType" : { "type" : "text" } } }, "convTag" : { "properties" : { "tagId" : { "type" : "long" }, "convId" : { "type" : "long" }, "level" : { "type" : "long" }, "value" : { "type" : "text" } } } } } }, "settings" : { "number_of_shards":2, "number_of_replicas" : 1, "refresh_interval":"1s" } } # 迁移复制数据 临时索引》》》新的原始索引 POST _reindex { "source": { "index": "crm_meiqia_conversation_tmp", "size":500 }, "dest": { "index": "crm_meiqia_conversation" } }
最后执行成功后,完成本次关于 索引 crm_meiqia_conversation 的更改字段属性 的操作
写在最后
其实对于 es 更改索引字段的操作,确实比较费劲,需要先创建临时索引,转移复制数据后,删除原始索引,再创建新的索引,并把临时索引的数据再迁移回新的索引中。所以在创建 es 索引之处就需要综合考量,将字段的属性设计以及索引结构设计做到准确,防止后续出现这样的情况比较费劲。另外如果待迁移索引的数据量比较大的话,来回迁移数据除了耗时以外,还会需要一个较大的磁盘空间才能完成操作,不然会报磁盘不足的错误提示的。