环境如下:
[root@node08 tmp]# openstack --version openstack 3.0.0 [root@node08 tmp]# ceph --version ceph version 0.2.10 (5a781946763482982879b2aeee27394ff707baf8)
由于某种原因,承载虚拟机的服务器在虚拟机正在运行的时候被下架了,虚拟机状态变成了错误,现在需要把虚拟机通过一些手段恢复回来。
找出虚拟机
首先通过nova对应命令找寻到对应虚拟机,如果虚拟机数量太多且知道对应名字可以直接通过grep过滤,我这边error的数量很多,就直接通过状态过滤了。
[root@node08 tmp]# nova list +----+------+--------+------------+-------------+----------+ | ID | Name | Status | Task State | Power State | Networks | +----+------+--------+------------+-------------+----------+ +----+------+--------+------------+-------------+----------+ [root@node08 tmp]# nova list --all |grep -i error # 查询所有节点虚拟机并通过status状态为error的过滤 | d809e059-e3b0-49ff-aff9-c9bfca042e55 | 51WORLD | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.129 | | 7131c4f5-5607-422c-900a-aab4025caa8b | big | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.145 | | 9cfc3d0f-77ce-4d82-a049-c5b93db4743d | cc_test_0714-1 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.133 | | 058df557-8159-4932-8249-9782b00e7b1b | cc_test_0714-2 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.134 | | 1984ecdd-66df-4dff-a197-c77e8fc329e1 | cc_test_0714-3 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.132 | | 0da6585f-ecf8-42fd-883a-fb9f8e617bf4 | cc_test_0714-4 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.131 | | 0f107591-f5c2-441a-a2fd-0110d1494cad | cs-server2_0901 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.142 | | 4853df7d-4ed2-427e-81de-d5f8732c65ee | ecoplants_0910-1 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.148 | | 6d0df374-b6ff-4a03-a930-89dd81e1715d | ecoplants_0910-2 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.149 | | 4d802755-4bed-4069-a3b5-e218dacba06f | k8s_server-1 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.105 | | ea772d7a-55ca-4505-8522-722c2de67979 | k8s_server-2 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.108 | | 62448d1a-186b-4d42-8ae6-3c1136d38179 | k8s_server-3 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.107 | | fe7a2f65-2e10-49b1-9edc-c41861914662 | lanhai_test0916 | 295f4e94e8b84dceb1cf69599ba7c2ca | ERROR | - | Shutdown | vlan1021=10.10.21.151 | | c361ab2c-a772-4525-a7e8-4cec70b0a560 | normal_0817 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.115 | | f4546f44-b5eb-4e73-93fb-9325ef272c01 | seafile_0823 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.47 | | 7a8be3e1-0688-4c75-9bea-700924f5ab90 | xcx_0822 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Shutdown | vlan1021=10.10.21.121 | | 57a95508-15f8-4cfd-8ef2-5040cf25efb4 | zdp_manage | a73a06797eca440b9728d40c552bcb90 | ERROR | - | Running | vlan1030=10.10.30.253 | | 0d4f288a-f67c-44c3-8a94-823e66cda5b9 | zdp_node01 | a73a06797eca440b9728d40c552bcb90 | ERROR | - | Running | vlan1030=10.10.30.252 | | 164f3b44-10dc-4d79-8f87-92b835811f56 | 推流_0817 | a73a06797eca440b9728d40c552bcb90 | ERROR | - | Running | vlan1030=10.10.30.21 | | 1beba6db-01d1-42ef-adb4-896a0a0cb660 | 直播_0815 | e377f6bbc0e04fa0862e1a98bc62fe69 | ERROR | - | Running | vlan1021=10.10.21.112; vlan1030=10.10.30.23 | | ea36a58a-2113-4805-95c2-a9a2cc7eefc4 | 超算-server-2 | 535c6cb178ac4dcfaaaef9a4fb885871 | ERROR | - | Running | vlan1022=10.10.22.111 |
这边坏的机器太多了,我就以big为例来尝试修复
通过nova命令找寻到对应虚拟机的存储rbd路径
[root@node08 tmp]# nova show 7131c4f5-5607-422c-900a-aab4025caa8b # 通过虚拟机ID查询虚拟机详情 +--------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ | Property | Value | +--------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ | OS-DCF:diskConfig | MANUAL | | OS-EXT-AZ:availability_zone | zettakit | | OS-EXT-SRV-ATTR:host | node10 | | OS-EXT-SRV-ATTR:hostname | big | | OS-EXT-SRV-ATTR:hypervisor_hostname | node10 | | OS-EXT-SRV-ATTR:instance_name | instance-00000dbd | | OS-EXT-SRV-ATTR:kernel_id | | | OS-EXT-SRV-ATTR:launch_index | 0 | | OS-EXT-SRV-ATTR:ramdisk_id | | | OS-EXT-SRV-ATTR:reservation_id | r-ubly6epj | | OS-EXT-SRV-ATTR:root_device_name | /dev/sda | | OS-EXT-SRV-ATTR:user_data | - | | OS-EXT-STS:power_state | 4 | | OS-EXT-STS:task_state | - | | OS-EXT-STS:vm_state | error | | OS-SRV-USG:launched_at | 2022-09-08T02:31:07.000000 | | OS-SRV-USG:terminated_at | - | | accessIPv4 | | | accessIPv6 | | | attached_iso | - | | attached_usb | [] | | config_drive | True | | created | 2022-09-08T02:30:46Z | | description | - | | distribution_user_id | c1b8319aa87d4374bc9ec5e217824675 | | flavor | Flavor not found (0) | | floating_ips | [] | | hostId | 005e2fb7c75ec9942d79802a9ed8f63b73544a2babee0cd2bb41d08a | | host_status | UNKNOWN | | hot_add_info | {"use_vcpus": 0, "use_memory": 0, "numa_memory_info": [], "max_memory": 1024, "max_memory_slot": 255, "max_vcpus": 128, "use_memory_slot": 0} | | id | 7131c4f5-5607-422c-900a-aab4025caa8b | | image | 发明家 (2a927fb1-3606-43cf-ba8a-669d16a10228) | | key_name | - | | kms_server | - | | ldap | False | | ldap_server | - | | life_cycle | - | | locked | False | | logout_time | 0 | | metadata | {"reserve_pci_resources": "true"} | | name | big | | numa_topology | {"numa_info": [{"node": 0, "virtual_node": 0}, {"node": 1, "virtual_node": 1}], "use_numa": 2} | | os-extended-volumes:volumes_attached | [{"device": "/dev/sda", "delete_on_termination": true, "id": "db97656b-7f68-4705-9204-ea76ac9422f7", "size": 200}] | | os-pci:pci_devices | [] | | os-pci:request_pci_devices | [{"count": 1, "vendor_id": "10de", "product_id": "2204"}] | | pool | False | | security_groups | default | | serial_number | - | | status | ERROR | | tags | [] | | tenant_id | e377f6bbc0e04fa0862e1a98bc62fe69 | | updated | 2022-09-30T01:48:47Z | | user_id | 0e9ca51a76df4b1b8c4944c7a11a2d75 | | vlan1021 network | 10.10.21.145 | +--------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ [root@node08 tmp]# nova show 7131c4f5-5607-422c-900a-aab4025caa8b |grep volumes # 上面列出的信息太多,我们只需要卷信息即可 | os-extended-volumes:volumes_attached | [{"device": "/dev/sda", "delete_on_termination": true, "id": "db97656b-7f68-4705-9204-ea76ac9422f7", "size": 200}] [root@node08 tmp]# rbd ls volumes |grep db97656b-7f68-4705-9204-ea76ac9422f7 volume-db97656b-7f68-4705-9204-ea76ac9422f7
通过这一步我们已经得到了卷ID
使用rbd命令将对应的卷导出
[root@node08 tmp]# rbd export -p volumes volume-db97656b-7f68-4705-9204-ea76ac9422f7 ./vm1.bak Exporting image: 100% complete...done. [root@node08 tmp]# file vm1.bak vm1.bak: x86 boot sector; partition 1: ID=0xee, starthead 0, startsector 1, 4294967295 sectors, code offset 0xc0, OEM-ID " м", Bytes/sector 190, sectors/cluster 124, reserved sectors 191, FATs 6, root entries 185, sectors 64514 (volumes <=32 MB) , Media descriptor 0xf3, sectors/FAT 20644, heads 6, hidden sectors 309755, sectors 2147991229 (volumes > 32 MB) , physical drive 0x7e, dos < 4.0 BootSector (0x0) [root@node08 tmp]# ll -h vm1.bak -rw-r--r-- 1 root root 100G Mar 21 22:58 vm1.bak
新建一台同等配置的云主机
我这边已经新建完成并且对这个新机器进行关机操作
[root@node08 tmp]# nova list --all |grep new | 0e6910de-448d-476b-876a-73ef248d7f21 | new-test | e377f6bbc0e04fa0862e1a98bc62fe69 | ACTIVE | - | Shutdown | vlan1021=10.10.21.74 |
同样,查询出对应卷id
[root@node08 tmp]# nova show 0e6910de-448d-476b-876a-73ef248d7f21|grep volumes | os-extended-volumes:volumes_attached | [{"device": "/dev/sda", "delete_on_termination": true, "id": "5c1ddf04-6f7f-4fef-85bd-4d81063d263e", "size": 40}] | [root@node08 tmp]# rbd ls volumes |grep 5c1ddf04-6f7f-4fef-85bd-4d81063d263e volume-5c1ddf04-6f7f-4fef-85bd-4d81063d263e
删除虚拟机现有的卷
[root@node08 tmp]# rbd rm -p volumes volume-5c1ddf04-6f7f-4fef-85bd-4d81063d263e Removing image: 100% complete...done.
此时再使用rbd ls volumes已经查不到这个卷了
将之前导出的文件导入到这个虚拟机中并确保卷id和刚刚记录的一致
[root@node08 tmp]# rbd import -p volumes vm2.bak volume-5c1ddf04-6f7f-4fef-85bd-4d81063d263e rbd: --pool is deprecated for import, use --dest-pool Importing image: 100% complete...done.
检查
现在已经大功告成了,去控制台或者直接命令将对应云主机开机,确认数据是否还在即可