nova start 虚机的代码流程分析,以ocata版本为分析基础
1、nova api服务接受用户下发的 nova start启动虚机请求
其对应的http restfull api接口为post /servers/{server_id}/action
发送的action为os-start
nova/api/openstack/compute/servers.py class ServersController(wsgi.Controller): def _start_server(self, req, id, body): """Start an instance.""" context = req.environ['nova.context'] instance = self._get_instance(context, id) context.can(server_policies.SERVERS % 'start', instance) try: self.compute_api.start(context, instance)--------compute服务的api模块接受该请求 except (exception.InstanceNotReady, exception.InstanceIsLocked) as e: raise webob.exc.HTTPConflict(explanation=e.format_message()) except exception.InstanceUnknownCell as e: raise exc.HTTPNotFound(explanation=e.format_message()) except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'start', id)
2、nova compute模块的api处理该请求
nova/compute/api.py class API(base.Base): @check_instance_state(vm_state=[vm_states.STOPPED]) def start(self, context, instance): """Start an instance.""" LOG.debug("Going to try to start instance", instance=instance) instance.task_state = task_states.POWERING_ON instance.save(expected_task_state=[None]) self._record_action_start(context, instance, instance_actions.START)-----记录对虚机的action操作 # TODO(yamahata): injected_files isn't supported right now. # It is used only for osapi. not for ec2 api. # availability_zone isn't used by run_instance. self.compute_rpcapi.start_instance(context, instance)-----给nova compute服务发送rpc请求
3、nova-compute接受rpc请求,最终接受rpc请求的是,对应的manager.py文件中的对应方法
nova/compute/rpcapi.py @profiler.trace_cls("rpc") class ComputeAPI(object): def start_instance(self, ctxt, instance): version = '4.0' cctxt = self.router.by_instance(ctxt, instance).prepare( server=_compute_host(None, instance), version=version) cctxt.cast(ctxt, 'start_instance', instance=instance) nova/compute/manager.py def start_instance(self, context, instance): """Starting an instance on this host.""" self._notify_about_instance_usage(context, instance, "power_on.start")-----发送虚机上电开始的信息 compute_utils.notify_about_instance_action(context, instance, self.host, action=fields.NotificationAction.POWER_ON, phase=fields.NotificationPhase.START) self._power_on(context, instance)-------核心,给虚机上电 3.1 instance.power_state = self._get_power_state(context, instance) instance.vm_state = vm_states.ACTIVE instance.task_state = None # Delete an image(VM snapshot) for a shelved instance snapshot_id = instance.system_metadata.get('shelved_image_id') if snapshot_id: self._delete_snapshot_of_shelved_instance(context, instance, snapshot_id) # Delete system_metadata for a shelved instance compute_utils.remove_shelved_keys_from_system_metadata(instance) instance.save(expected_task_state=task_states.POWERING_ON) self._notify_about_instance_usage(context, instance, "power_on.end")----发送虚机上电完成的信息 compute_utils.notify_about_instance_action(context, instance, self.host, action=fields.NotificationAction.POWER_ON, phase=fields.NotificationPhase.END)
3.1 对_power_on(context, instance)函数的详解
nova/compute/manager.py def _power_on(self, context, instance): network_info = self.network_api.get_instance_nw_info(context, instance)-----s1 获取虚拟机的网络信息 block_device_info = self._get_instance_block_device_info(context,instance)----s2 获取虚机挂在卷的信息 self.driver.power_on(context, instance,network_info,block_device_info)--- s3
s3 由于openstack默认使用libvirt,所以调用的libvirt的驱动,
为了确保在创建虚机的时候,镜像、网络、要挂在的块设备有效和正常建立,采用了硬重启
nova/virt/libvirt/driver.py class LibvirtDriver(driver.ComputeDriver): def power_on(self, context, instance, network_info, block_device_info=None): """Power on the specified instance.""" self._hard_reboot(context, instance, network_info, block_device_info)
3.1.1 对_hard_reboot函数的详解
该函数主要做的功能是:
1)强制关闭虚拟机
2)删除虚机的xml文件
3)获取虚机镜像信息
4)生成xml文件
5)创建xml,启动虚机
nova/virt/libvirt/driver.py class LibvirtDriver(driver.ComputeDriver): def _hard_reboot(self, context, instance, network_info, block_device_info=None): """Reboot a virtual machine, given an instance reference. Performs a Libvirt reset (if supported) on the domain. If Libvirt reset is unavailable this method actually destroys and re-creates the domain to ensure the reboot happens, as the guest OS cannot ignore this action. """ self._destroy(instance)-----s1 # Domain XML will be redefined so we can safely undefine it # from libvirt. This ensure that such process as create serial # console for guest will run smoothly. self._undefine_domain(instance)--s2 # Convert the system metadata to image metadata # NOTE(mdbooth): This is a workaround for stateless Nova compute # https://bugs.launchpad.net/nova/+bug/1349978 instance_dir = libvirt_utils.get_instance_path(instance)----s3 fileutils.ensure_tree(instance_dir) disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,instance, instance.image_meta,---s4 block_device_info) # NOTE(vish): This could generate the wrong device_format if we are # using the raw backend and the images don't exist yet. # The create_images_and_backing below doesn't properly # regenerate raw backend images, however, so when it # does we need to (re)generate the xml after the images # are in place. xml = self._get_guest_xml(context, instance, network_info, disk_info,----s5 instance.image_meta, block_device_info=block_device_info) # NOTE(mdbooth): context.auth_token will not be set when we call # _hard_reboot from resume_state_on_host_boot() if context.auth_token is not None:-----------------s6 # NOTE (rmk): Re-populate any missing backing files. backing_disk_info = self._get_instance_disk_info(instance.name, xml, block_device_info) self._create_images_and_backing(context, instance, instance_dir, backing_disk_info) # Initialize all the necessary networking, block devices and # start the instance. self._create_domain_and_network(context, xml, instance, network_info,-------s7 disk_info, block_device_info=block_device_info, reboot=True, vifs_already_plugged=True) self._prepare_pci_devices_for_use( pci_manager.get_instance_pci_devs(instance, 'all')) def _wait_for_reboot(): """Called at an interval until the VM is running again.""" state = self.get_info(instance).state if state == power_state.RUNNING: LOG.info(_LI("Instance rebooted successfully."), instance=instance) raise loopingcall.LoopingCallDone() timer = loopingcall.FixedIntervalLoopingCall(_wait_for_reboot) timer.start(interval=0.5).wait()
在虚机启动的时候,涉及虚机相关信息变化的目录有三个
/var/lib/libvirt/qemu----存放虚机运行domain域的目录
/etc/libvirt/qemu---------存放虚机xml文件的目录
/os_instance/_base--------存储虚机镜像的目录
/os_instance/虚机uuid-----存放虚机disk磁盘信息的目录
s1 执行时,上面目录没有任何变化
s2 执行时,/etc/libvirt/qemu目录下,虚机对应的xml删除,其他目录变化
s3 执行时,获取虚机的目录/os_instance/03cb8a7c-786f-402a-b059-1f2d90e69bd4
s4 执行时,获取虚机disk信息
主要相关参数的值如下:
CONF.libvirt.virt_type='kvm'
block_device_info={'swap': None, 'root_device_name': u'/dev/vda', 'ephemerals': [], 'block_device_mapping': []}
disk_info={'disk_bus': 'virtio',
'cdrom_bus': 'ide',
'mapping': {'disk.config': {'bus': 'ide', 'type': 'cdrom', 'dev': 'hda'},
'disk': {'bus': 'virtio', 'boot_index': '1', 'type': 'disk', 'dev': u'vda'},
'root': {'bus': 'virtio', 'boot_index': '1', 'type': 'disk', 'dev': u'vda'}}
}
s5 执行时,生成xml文件信息,此时,只是在内存里面存放,还没有写到/etc/libvirt/qemu目录对应的xml文件里面
s6 context.auth_token 存放token信息
backing_disk_info=[
{'disk_size': 149159936,
'backing_file': '4c4935095cb43925d61d67395c452ea248e6b1c4',
'virt_disk_size': 85899345920,
'path': '/os_instance/03cb8a7c-786f-402a-b059-1f2d90e69bd4/disk',
'type': 'qcow2',
'over_committed_disk_size': 85750185984},
{'disk_size': 489472,
'backing_file': '',
'virt_disk_size': 489472,
'path': '/os_instance/03cb8a7c-786f-402a-b059-1f2d90e69bd4/disk.config',
'type': 'raw',
'over_committed_disk_size': 0}
]
s7 执行时,在/etc/libvirt/qemu目录下生成虚机对应的xml文件,其他目录无变化
nova stop 虚机的时候,这三个目录的变化情况
/var/lib/libvirt/qemu目录下domain-21-instance-xx,相关的目录会被删除,其他目录无变化