HBase分布式启动从节点提示通知主节点自己准备好失败---求助? 400 报错
环境:
两台机器A,B分别部署HBase,A上面配置为master,B为从节点。两台机器的通信参考了HBase官网的ssh-keygen。A机器上只有一个zk实例,B机器上没有。B机器上的hbase-site.xml中指定zk的地址为A机器上的zk。两个hbase都是使用A机器上的zk实例
问题:
启动A的zk,正常;
启动A的hbase,启动日志有如下:
2017-03-16 23:25:18,779 INFO [ProcedureExecutor-0] master.AssignmentManager: Unable to communicate with localhost.localdomain2,16020,1489677910740 in order to assign regions,
java.io.IOException: java.io.IOException: missing table descriptor for 652fbd2f38db51d2b46362afa44ab367
at org.apache.hadoop.hbase.regionserver.RSRpcServices.openRegion(RSRpcServices.java:1574)
at org.apache.hadoop.hbase.protobuf.generated.AdminProtos$AdminService$2.callBlockingMethod(AdminProtos.java:22239)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2180)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:112)
at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133)
at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108)
at java.lang.Thread.run(Thread.java:745)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:95)
at org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:329)
at org.apache.hadoop.hbase.master.ServerManager.sendRegionOpen(ServerManager.java:801)
at org.apache.hadoop.hbase.master.AssignmentManager.assign(AssignmentManager.java:1728)
at org.apache.hadoop.hbase.master.AssignmentManager.assign(AssignmentManager.java:2838)
at org.apache.hadoop.hbase.master.AssignmentManager.assign(AssignmentManager.java:2817)
at org.apache.hadoop.hbase.util.ModifyRegionUtils.assignRegions(ModifyRegionUtils.java:288)
at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.assignRegions(CreateTableProcedure.java:451)
at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.executeFromState(CreateTableProcedure.java:126)
at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.executeFromState(CreateTableProcedure.java:58)
at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:119)
at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:498)
at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1061)
at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execLoop(ProcedureExecutor.java:856)
at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execLoop(ProcedureExecutor.java:809)
at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$400(ProcedureExecutor.java:75)
at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.run(ProcedureExecutor.java:495)
Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(java.io.IOException): java.io.IOException: missing table descriptor for 652fbd2f38db51d2b46362afa44ab367
at org.apache.hadoop.hbase.regionserver.RSRpcServices.openRegion(RSRpcServices.java:1574)
at org.apache.hadoop.hbase.protobuf.generated.AdminProtos$AdminService$2.callBlockingMethod(AdminProtos.java:22239)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2180)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:112)
at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133)
at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108)
at java.lang.Thread.run(Thread.java:745)
at org.apache.hadoop.hbase.ipc.RpcClientImpl.call(RpcClientImpl.java:1267)
at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:227)
at org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:336)
at org.apache.hadoop.hbase.protobuf.generated.AdminProtos$AdminService$BlockingStub.openRegion(AdminProtos.java:23125)
at org.apache.hadoop.hbase.master.ServerManager.sendRegionOpen(ServerManager.java:798)
... 14 more
2017-03-16 23:25:18,783 INFO [ProcedureExecutor-0] zookeeper.ZKTableStateManager: Moving table hbase:namespace state from ENABLING to ENABLED
2017-03-16 23:25:18,790 INFO [AM.-pool1-t1] master.RegionStates: Transition {652fbd2f38db51d2b46362afa44ab367 state=PENDING_OPEN, ts=1489677918759, server=localhost.localdomain2,16020,1489677910740} to {652fbd2f38db51d2b46362afa44ab367 state=OFFLINE, ts=1489677918790, server=localhost.localdomain2,16020,1489677910740}
2017-03-16 23:25:18,800 INFO [AM.-pool1-t1] master.AssignmentManager: Setting node as OFFLINED in ZooKeeper for region {ENCODED => 652fbd2f38db51d2b46362afa44ab367, NAME => 'hbase:namespace,,1489677918060.652fbd2f38db51d2b46362afa44ab367.', STARTKEY => '', ENDKEY => ''}
2017-03-16 23:25:18,803 INFO [AM.-pool1-t1] master.AssignmentManager: Assigning hbase:namespace,,1489677918060.652fbd2f38db51d2b46362afa44ab367. to localhost.localdomain2,16020,1489677910740
2017-03-16 23:25:18,803 INFO [AM.-pool1-t1] master.RegionStates: Transition {652fbd2f38db51d2b46362afa44ab367 state=OFFLINE, ts=1489677918800, server=localhost.localdomain2,16020,1489677910740} to {652fbd2f38db51d2b46362afa44ab367 state=PENDING_OPEN, ts=1489677918803, server=localhost.localdomain2,16020,1489677910740}
2017-03-16 23:25:18,807 WARN [AM.-pool1-t1] master.AssignmentManager: Failed assignment of hbase:namespace,,1489677918060.652fbd2f38db51d2b46362afa44ab367. to localhost.localdomain2,16020,1489677910740, trying to assign elsewhere instead; try=1 of 10
java.io.IOException: java.io.IOException: missing table descriptor for 652fbd2f38db51d2b46362afa44ab367
at org.apache.hadoop.hbase.regionserver.RSRpcServices.openRegion(RSRpcServices.java:1574)
at org.apache.hadoop.hbase.protobuf.generated.AdminProtos$AdminService$2.callBlockingMethod(AdminProtos.java:22239)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2180)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:112)
at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133)
at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108)
at java.lang.Thread.run(Thread.java:745)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:95)
at org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:329)
at org.apache.hadoop.hbase.master.ServerManager.sendRegionOpen(ServerManager.java:772)
at org.apache.hadoop.hbase.master.AssignmentManager.assign(AssignmentManager.java:2155)
at org.apache.hadoop.hbase.master.AssignmentManager.assign(AssignmentManager.java:1601)
at org.apache.hadoop.hbase.master.AssignCallable.call(AssignCallable.java:48)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(java.io.IOException):
经过各种配置调整,最终在杀器:卸载重装的模式下解决这个问题了。
在使用杀器之前,做过这么多工作:
1、生成ssh公钥和私钥,保证两个节点之间可以进行SSH免密码通信
2、同步两个节点上的时间(网络上看到过这个issue,时间不同步可能会有问题)
3、两个节点宿主机配置/etc/hosts:
a、 ip为真实分配到的ip;
b、 域名为宿主机真实的hostname
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。