在openstack nova创建虚拟机过程(一)中,分析结束在/nova/conductor/rpcapi.py函数self.conductor_compute_rpcapi.schedule_and_build_instances(),如下:
def schedule_and_build_instances(self, context, build_requests,
request_specs,
image, admin_password, injected_files,
requested_networks,
block_device_mapping,
tags=None):
version = '1.17'
kw = {'build_requests': build_requests,
'request_specs': request_specs,
'image': jsonutils.to_primitive(image),
'admin_password': admin_password,
'injected_files': injected_files,
'requested_networks': requested_networks,
'block_device_mapping': block_device_mapping,
'tags': tags}
if not self.client.can_send_version(version):
version = '1.16'
del kw['tags']
cctxt = self.client.prepare(version=version)
# 发送异步rpc消息
cctxt.cast(context, 'schedule_and_build_instances', **kw)
它通过cast的方式将消息发送到消息总线上,等待nova-conductor处理。
注明:RPC.call()发送消息到队列,等待返回。阻塞;RPC.cast()发送消息到队列,不等待返回继续执行,非阻塞。
nova-conductor订阅了执行schedule_and_build_instances的消息,将调用/nova/conductor/manager.py中该函数,如下:
def schedule_and_build_instances(self, context, build_requests,
request_specs, image,
admin_password, injected_files,
requested_networks, block_device_mapping,
tags=None):
# Add all the UUIDs for the instances
instance_uuids = [spec.instance_uuid for spec in request_specs]
try:
# 返回适合创建虚拟机的主机列表,调用顺序:
# /nova/conductor/manager.py:_schedule_instances() --> /nova/scheduler/client/__init__.py:select_destinations() --> /nova/scheduler/client/query.py:select_destinations()
# --> /nova/scheduler/rpcapi.py:select_destinations(),向消息队列发送执行selet_destinations请求,使用call方法,等待nova-scheduler返回适合的主机
host_lists = self._schedule_instances(context, request_specs[0],
instance_uuids, return_alternates=True)
except Exception as exc:
LOG.exception('Failed to schedule instances')
self._bury_in_cell0(context, request_specs[0], exc,
build_requests=build_requests)
return
host_mapping_cache = {}
cell_mapping_cache = {}
instances = []
for (build_request, request_spec, host_list) in six.moves.zip(
build_requests, request_specs, host_lists):
instance = build_request.get_new_instance(context)
# host_list is a list of one or more Selection objects, the first
# of which has been selected and its resources claimed.
host = host_list[0]
# Convert host from the scheduler into a cell record
if host.service_host not in host_mapping_cache:
try:
host_mapping = objects.HostMapping.get_by_host(
context, host.service_host)
host_mapping_cache[host.service_host] = host_mapping
except exception.HostMappingNotFound as exc:
LOG.error('No host-to-cell mapping found for selected '
'host %(host)s. Setup is incomplete.',
{'host': host.service_host})
self._bury_in_cell0(context, request_spec, exc,
build_requests=[build_request],
instances=[instance])
# This is a placeholder in case the quota recheck fails.
instances.append(None)
continue
else:
host_mapping = host_mapping_cache[host.service_host]
cell = host_mapping.cell_mapping
# Before we create the instance, let's make one final check that
# the build request is still around and wasn't deleted by the user
# already.
try:
objects.BuildRequest.get_by_instance_uuid(
context, instance.uuid)
except exception.BuildRequestNotFound:
# the build request is gone so we're done for this instance
LOG.debug('While scheduling instance, the build request '
'was already deleted.', instance=instance)
# This is a placeholder in case the quota recheck fails.
instances.append(None)
rc = self.scheduler_client.reportclient
rc.delete_allocation_for_instance(context, instance.uuid)
continue
else:
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host.service_host))
with obj_target_cell(instance, cell):
instance.create()
instances.append(instance)
cell_mapping_cache[instance.uuid] = cell
# NOTE(melwitt): We recheck the quota after creating the
# objects to prevent users from allocating more resources
# than their allowed quota in the event of a race. This is
# configurable because it can be expensive if strict quota
# limits are not required in a deployment.
if CONF.quota.recheck_quota:
try:
compute_utils.check_num_instances_quota(
context, instance.flavor, 0, 0,
orig_num_req=len(build_requests))
except exception.TooManyInstances as exc:
with excutils.save_and_reraise_exception():
self._cleanup_build_artifacts(context, exc, instances,
build_requests,
request_specs,
cell_mapping_cache)
zipped = six.moves.zip(build_requests, request_specs, host_lists,
instances)
for (build_request, request_spec, host_list, instance) in zipped:
if instance is None:
# Skip placeholders that were buried in cell0 or had their
# build requests deleted by the user before instance create.
continue
cell = cell_mapping_cache[instance.uuid]
# host_list is a list of one or more Selection objects, the first
# of which has been selected and its resources claimed.
host = host_list.pop(0)
alts = [(alt.service_host, alt.nodename) for alt in host_list]
LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
host.service_host, host.nodename, alts, instance=instance)
filter_props = request_spec.to_legacy_filter_properties_dict()
scheduler_utils.populate_retry(filter_props, instance.uuid)
scheduler_utils.populate_filter_properties(filter_props,
host)
# TODO(melwitt): Maybe we should set_target_cell on the contexts
# once we map to a cell, and remove these separate with statements.
with obj_target_cell(instance, cell) as cctxt:
# send a state update notification for the initial create to
# show it going from non-existent to BUILDING
# This can lazy-load attributes on instance.
notifications.send_update_with_states(cctxt, instance, None,
vm_states.BUILDING, None, None, service="conductor")
objects.InstanceAction.action_start(
cctxt, instance.uuid, instance_actions.CREATE,
want_result=False)
instance_bdms = self._create_block_device_mapping(
cell, instance.flavor, instance.uuid, block_device_mapping)
instance_tags = self._create_tags(cctxt, instance.uuid, tags)
# TODO(Kevin Zheng): clean this up once instance.create() handles
# tags; we do this so the instance.create notification in
# build_and_run_instance in nova-compute doesn't lazy-load tags
instance.tags = instance_tags if instance_tags \
else objects.TagList()
# Update mapping for instance. Normally this check is guarded by
# a try/except but if we're here we know that a newer nova-api
# handled the build process and would have created the mapping
inst_mapping = objects.InstanceMapping.get_by_instance_uuid(
context, instance.uuid)
inst_mapping.cell_mapping = cell
inst_mapping.save()
if not self._delete_build_request(
context, build_request, instance, cell, instance_bdms,
instance_tags):
# The build request was deleted before/during scheduling so
# the instance is gone and we don't have anything to build for
# this one.
continue
# NOTE(danms): Compute RPC expects security group names or ids
# not objects, so convert this to a list of names until we can
# pass the objects.
legacy_secgroups = [s.identifier
for s in request_spec.security_groups]
with obj_target_cell(instance, cell) as cctxt:
self.compute_rpcapi.build_and_run_instance(
cctxt, instance=instance, image=image,
request_spec=request_spec,
filter_properties=filter_props,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=legacy_secgroups,
block_device_mapping=instance_bdms,
host=host.service_host, node=host.nodename,
limits=host.limits, host_list=host_list)
之后,进入/nova/compute/rpcapi.py中,向消息队列发送非阻塞消息请求执行build_and_run_instance(),如下:
def build_and_run_instance(self, ctxt, instance, host, image, request_spec,
filter_properties, admin_password=None, injected_files=None,
requested_networks=None, security_groups=None,
block_device_mapping=None, node=None, limits=None,
host_list=None):
# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
limits = limits.to_dict()
kwargs = {"instance": instance,
"image": image,
"request_spec": request_spec,
"filter_properties": filter_properties,
"admin_password": admin_password,
"injected_files": injected_files,
"requested_networks": requested_networks,
"security_groups": security_groups,
"block_device_mapping": block_device_mapping,
"node": node,
"limits": limits,
"host_list": host_list,
}
client = self.router.client(ctxt)
version = '4.19'
if not client.can_send_version(version):
version = '4.0'
kwargs.pop("host_list")
cctxt = client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'build_and_run_instance', **kwargs)
nova-compute订阅了执行build_and_run_instance()的消息,并在/nova/compute/manager.py中执行该函数,如下:
def build_and_run_instance(self, context, instance, image, request_spec,
filter_properties, admin_password=None,
injected_files=None, requested_networks=None,
security_groups=None, block_device_mapping=None,
node=None, limits=None, host_list=None):
@utils.synchronized(instance.uuid)
def _locked_do_build_and_run_instance(*args, **kwargs):
# NOTE(danms): We grab the semaphore with the instance uuid
# locked because we could wait in line to build this instance
# for a while and we want to make sure that nothing else tries
# to do anything with this instance while we wait.
with self._build_semaphore:
try:
result = self._do_build_and_run_instance(*args, **kwargs)
except Exception:
# NOTE(mriedem): This should really only happen if
# _decode_files in _do_build_and_run_instance fails, and
# that's before a guest is spawned so it's OK to remove
# allocations for the instance for this node from Placement
# below as there is no guest consuming resources anyway.
# The _decode_files case could be handled more specifically
# but that's left for another day.
result = build_results.FAILED
raise
finally:
if result == build_results.FAILED:
# Remove the allocation records from Placement for the
# instance if the build failed. The instance.host is
# likely set to None in _do_build_and_run_instance
# which means if the user deletes the instance, it
# will be deleted in the API, not the compute service.
# Setting the instance.host to None in
# _do_build_and_run_instance means that the
# ResourceTracker will no longer consider this instance
# to be claiming resources against it, so we want to
# reflect that same thing in Placement. No need to
# call this for a reschedule, as the allocations will
# have already been removed in
# self._do_build_and_run_instance().
self._delete_allocation_for_instance(context,
instance.uuid)
if result in (build_results.FAILED,
build_results.RESCHEDULED):
self._build_failed()
else:
self._failed_builds = 0
# NOTE(danms): We spawn here to return the RPC worker thread back to
# the pool. Since what follows could take a really long time, we don't
# want to tie up RPC workers.
utils.spawn_n(_locked_do_build_and_run_instance,
context, instance, image, request_spec,
filter_properties, admin_password, injected_files,
requested_networks, security_groups,
block_device_mapping, node, limits, host_list)
调用/nova/compute/manager.py下的_do_build_and_run_instance(),如下:
def _do_build_and_run_instance(self, context, instance, image,
request_spec, filter_properties, admin_password, injected_files,
requested_networks, security_groups, block_device_mapping,
node=None, limits=None, host_list=None):
try:
LOG.debug('Starting instance...', instance=instance)
instance.vm_state = vm_states.BUILDING
instance.task_state = None
instance.save(expected_task_state=
(task_states.SCHEDULING, None))
except exception.InstanceNotFound:
msg = 'Instance disappeared before build.'
LOG.debug(msg, instance=instance)
return build_results.FAILED
except exception.UnexpectedTaskStateError as e:
LOG.debug(e.format_message(), instance=instance)
return build_results.FAILED
# b64 decode the files to inject:
decoded_files = self._decode_files(injected_files)
if limits is None:
limits = {}
if node is None:
node = self._get_nodename(instance, refresh=True)
try:
with timeutils.StopWatch() as timer:
self._build_and_run_instance(context, instance, image,
decoded_files, admin_password, requested_networks,
security_groups, block_device_mapping, node, limits,
filter_properties, request_spec)
LOG.info('Took %0.2f seconds to build instance.',
timer.elapsed(), instance=instance)
return build_results.ACTIVE
except exception.RescheduledException as e:
retry = filter_properties.get('retry')
if not retry:
# no retry information, do not reschedule.
LOG.debug("Retry info not present, will not reschedule",
instance=instance)
self._cleanup_allocated_networks(context, instance,
requested_networks)
self._cleanup_volumes(context, instance.uuid,
block_device_mapping, raise_exc=False)
compute_utils.add_instance_fault_from_exc(context,
instance, e, sys.exc_info(),
fault_message=e.kwargs['reason'])
self._nil_out_instance_obj_host_and_node(instance)
self._set_instance_obj_error_state(context, instance,
clean_task_state=True)
return build_results.FAILED
LOG.debug(e.format_message(), instance=instance)
# This will be used for logging the exception
retry['exc'] = traceback.format_exception(*sys.exc_info())
# This will be used for setting the instance fault message
retry['exc_reason'] = e.kwargs['reason']
# NOTE(comstud): Deallocate networks if the driver wants
# us to do so.
# NOTE(vladikr): SR-IOV ports should be deallocated to
# allow new sriov pci devices to be allocated on a new host.
# Otherwise, if devices with pci addresses are already allocated
# on the destination host, the instance will fail to spawn.
# info_cache.network_info should be present at this stage.
if (self.driver.deallocate_networks_on_reschedule(instance) or
self.deallocate_sriov_ports_on_reschedule(instance)):
self._cleanup_allocated_networks(context, instance,
requested_networks)
else:
# NOTE(alex_xu): Network already allocated and we don't
# want to deallocate them before rescheduling. But we need
# to cleanup those network resources setup on this host before
# rescheduling.
self.network_api.cleanup_instance_network_on_host(
context, instance, self.host)
self._nil_out_instance_obj_host_and_node(instance)
instance.task_state = task_states.SCHEDULING
instance.save()
# The instance will have already claimed resources from this host
# before this build was attempted. Now that it has failed, we need
# to unclaim those resources before casting to the conductor, so
# that if there are alternate hosts available for a retry, it can
# claim resources on that new host for the instance.
self._delete_allocation_for_instance(context, instance.uuid)
self.compute_task_api.build_instances(context, [instance],
image, filter_properties, admin_password,
injected_files, requested_networks, security_groups,
block_device_mapping, request_spec=request_spec,
host_lists=[host_list])
return build_results.RESCHEDULED
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError):
msg = 'Instance disappeared during build.'
LOG.debug(msg, instance=instance)
self._cleanup_allocated_networks(context, instance,
requested_networks)
return build_results.FAILED
except exception.BuildAbortException as e:
LOG.exception(e.format_message(), instance=instance)
self._cleanup_allocated_networks(context, instance,
requested_networks)
self._cleanup_volumes(context, instance.uuid,
block_device_mapping, raise_exc=False)
compute_utils.add_instance_fault_from_exc(context, instance,
e, sys.exc_info())
self._nil_out_instance_obj_host_and_node(instance)
self._set_instance_obj_error_state(context, instance,
clean_task_state=True)
return build_results.FAILED
except Exception as e:
# Should not reach here.
LOG.exception('Unexpected build failure, not rescheduling build.',
instance=instance)
self._cleanup_allocated_networks(context, instance,
requested_networks)
self._cleanup_volumes(context, instance.uuid,
block_device_mapping, raise_exc=False)
compute_utils.add_instance_fault_from_exc(context, instance,
e, sys.exc_info())
self._nil_out_instance_obj_host_and_node(instance)
self._set_instance_obj_error_state(context, instance,
clean_task_state=True)
return build_results.FAILED
继续在本文件下调用_build_and_run_instance(),开始在hypervisor上孵化虚拟机,如下:
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties,
request_spec=None):
image_name = image.get('name')
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.START,
bdms=block_device_mapping)
# NOTE(mikal): cache the keystone roles associated with the instance
# at boot time for later reference
instance.system_metadata.update(
{'boot_roles': ','.join(context.roles)})
self._check_device_tagging(requested_networks, block_device_mapping)
try:
scheduler_hints = self._get_scheduler_hints(filter_properties,
request_spec)
rt = self._get_resource_tracker()
with rt.instance_claim(context, instance, node, limits):
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
self._validate_instance_group_policy(context, instance,
scheduler_hints)
image_meta = objects.ImageMeta.from_dict(image)
with self._build_resources(context, instance,
requested_networks, security_groups, image_meta,
block_device_mapping) as resources:
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SPAWNING
# NOTE(JoshNang) This also saves the changes to the
# instance from _allocate_network_async, as they aren't
# saved in that function to prevent races.
instance.save(expected_task_state=
task_states.BLOCK_DEVICE_MAPPING)
block_device_info = resources['block_device_info']
network_info = resources['network_info']
allocs = resources['allocations']
LOG.debug('Start spawning the instance on the hypervisor.',
instance=instance)
with timeutils.StopWatch() as timer:
# 开始在hypervisor上孵化虚拟机
self.driver.spawn(context, instance, image_meta,
injected_files, admin_password,
allocs, network_info=network_info,
block_device_info=block_device_info)
LOG.info('Took %0.2f seconds to spawn the instance on '
'the hypervisor.', timer.elapsed(),
instance=instance)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
except exception.ComputeResourcesUnavailable as e:
LOG.debug(e.format_message(), instance=instance)
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.RescheduledException(
instance_uuid=instance.uuid, reason=e.format_message())
except exception.BuildAbortException as e:
with excutils.save_and_reraise_exception():
LOG.debug(e.format_message(), instance=instance)
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
except (exception.FixedIpLimitExceeded,
exception.NoMoreNetworks, exception.NoMoreFixedIps) as e:
LOG.warning('No more network or fixed IP to be allocated',
instance=instance)
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
msg = _('Failed to allocate the network(s) with error %s, '
'not rescheduling.') % e.format_message()
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)
except (exception.VirtualInterfaceCreateException,
exception.VirtualInterfaceMacAddressException,
exception.FixedIpInvalidOnHost,
exception.UnableToAutoAllocateNetwork) as e:
LOG.exception('Failed to allocate network(s)',
instance=instance)
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
msg = _('Failed to allocate the network(s), not rescheduling.')
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=msg)
except (exception.FlavorDiskTooSmall,
exception.FlavorMemoryTooSmall,
exception.ImageNotActive,
exception.ImageUnacceptable,
exception.InvalidDiskInfo,
exception.InvalidDiskFormat,
cursive_exception.SignatureVerificationError,
exception.VolumeEncryptionNotSupported,
exception.InvalidInput) as e:
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.BuildAbortException(instance_uuid=instance.uuid,
reason=e.format_message())
except Exception as e:
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
raise exception.RescheduledException(
instance_uuid=instance.uuid, reason=six.text_type(e))
# NOTE(alaski): This is only useful during reschedules, remove it now.
instance.system_metadata.pop('network_allocated', None)
# If CONF.default_access_ip_network_name is set, grab the
# corresponding network and set the access ip values accordingly.
network_name = CONF.default_access_ip_network_name
if (network_name and not instance.access_ip_v4 and
not instance.access_ip_v6):
# Note that when there are multiple ips to choose from, an
# arbitrary one will be chosen.
for vif in network_info:
if vif['network']['label'] == network_name:
for ip in vif.fixed_ips():
if not instance.access_ip_v4 and ip['version'] == 4:
instance.access_ip_v4 = ip['address']
if not instance.access_ip_v6 and ip['version'] == 6:
instance.access_ip_v6 = ip['address']
break
self._update_instance_after_spawn(context, instance)
try:
instance.save(expected_task_state=task_states.SPAWNING)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance,
'create.error', fault=e)
compute_utils.notify_about_instance_create(
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=e,
bdms=block_device_mapping)
self._update_scheduler_instance_info(context, instance)
self._notify_about_instance_usage(context, instance, 'create.end',
extra_usage_info={'message': _('Success')},
network_info=network_info)
compute_utils.notify_about_instance_create(context, instance,
self.host, phase=fields.NotificationPhase.END,
bdms=block_device_mapping)
开始在hypervisor上孵化虚拟机需要调用hypervisor的相应python API驱动,在/nova/virt/下可以看到多种驱动程序,如下:
包括libvirt, powervm, vmwareapi, xenapi;我们使用libvirt,那么就会调用到/libvirt/driver.py下spawn()函数,如下:
def spawn(self, context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
# 获取disk配置信息
disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
instance,
image_meta,
block_device_info)
injection_info = InjectionInfo(network_info=network_info,
files=injected_files,
admin_pass=admin_password)
gen_confdrive = functools.partial(self._create_configdrive,
context, instance,
injection_info)
# 处理镜像
self._create_image(context, instance, disk_info['mapping'],
injection_info=injection_info,
block_device_info=block_device_info)
# Required by Quobyte CI
self._ensure_console_log_for_instance(instance)
# Does the guest need to be assigned some vGPU mediated devices ?
mdevs = self._allocate_mdevs(allocations)
# 将当前参数配置转化成创建虚拟机的xml文件
xml = self._get_guest_xml(context, instance, network_info,
disk_info, image_meta,
block_device_info=block_device_info,
mdevs=mdevs)
# 设置网络,创建实例
self._create_domain_and_network(
context, xml, instance, network_info,
block_device_info=block_device_info,
post_xml_callback=gen_confdrive,
destroy_disks_on_failure=True)
LOG.debug("Instance is running", instance=instance)
def _wait_for_boot():
"""Called at an interval until the VM is running."""
state = self.get_info(instance).state
if state == power_state.RUNNING:
LOG.info("Instance spawned successfully.", instance=instance)
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
timer.start(interval=0.5).wait()
本文件下调用的_create_domain_and_network()如下:
def _create_domain_and_network(self, context, xml, instance, network_info,
block_device_info=None, power_on=True,
vifs_already_plugged=False,
post_xml_callback=None,
destroy_disks_on_failure=False):
"""Do required network setup and create domain."""
timeout = CONF.vif_plugging_timeout
if (self._conn_supports_start_paused and
utils.is_neutron() and not
vifs_already_plugged and power_on and timeout):
events = self._get_neutron_events(network_info)
else:
events = []
pause = bool(events)
guest = None
try:
with self.virtapi.wait_for_instance_event(
instance, events, deadline=timeout,
error_callback=self._neutron_failed_callback):
self.plug_vifs(instance, network_info)
self.firewall_driver.setup_basic_filtering(instance,
network_info)
self.firewall_driver.prepare_instance_filter(instance,
network_info)
with self._lxc_disk_handler(context, instance,
instance.image_meta,
block_device_info):
guest = self._create_domain(
xml, pause=pause, power_on=power_on,
post_xml_callback=post_xml_callback)
self.firewall_driver.apply_instance_filter(instance,
network_info)
except exception.VirtualInterfaceCreateException:
# Neutron reported failure and we didn't swallow it, so
# bail here
with excutils.save_and_reraise_exception():
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
except eventlet.timeout.Timeout:
# We never heard from Neutron
LOG.warning('Timeout waiting for %(events)s for '
'instance with vm_state %(vm_state)s and '
'task_state %(task_state)s.',
{'events': events,
'vm_state': instance.vm_state,
'task_state': instance.task_state},
instance=instance)
if CONF.vif_plugging_is_fatal:
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
raise exception.VirtualInterfaceCreateException()
except Exception:
# Any other error, be sure to clean up
LOG.error('Failed to start libvirt guest', instance=instance)
with excutils.save_and_reraise_exception():
self._cleanup_failed_start(context, instance, network_info,
block_device_info, guest,
destroy_disks_on_failure)
# Resume only if domain has been paused
if pause:
guest.resume()
return guest
本文件下调用的_create_domain()如下:
def _create_domain(self, xml=None, domain=None,
power_on=True, pause=False, post_xml_callback=None):
"""Create a domain.
Either domain or xml must be passed in. If both are passed, then
the domain definition is overwritten from the xml.
:returns guest.Guest: Guest just created
"""
if xml:
# 调用libvirt定义虚拟机
guest = libvirt_guest.Guest.create(xml, self._host)
if post_xml_callback is not None:
post_xml_callback()
else:
guest = libvirt_guest.Guest(domain)
if power_on or pause:
# 调用libvirt启动虚拟机
guest.launch(pause=pause)
if not utils.is_neutron():
guest.enable_hairpin()
return guest
总结:到此,基本梳理出了openstack创建虚拟机的基本代码运行流程:openstack cli发送创建虚拟机的RESTful url --->>> nova-api接收到url请求,路由到/nova/api/openstack/compute/servers.py下的具体controller方法(def create())开始创建流程,执行到/nova/conductor/rpcapi.py下self.conductor_compute_rpcapi.schedule_and_build_instances()函数,向rabbitmq消息队列发送“执行schedule_and_build_instances”请求 --->>> nova-conductor接收到rpc请求,执行/nova/conductor/manager.py下schedule_and_build_instances()函数,期间向nova-scheduler发送rpc请求去获取到host_lists(适合创建虚拟机的主机列表),一系列处理后向rabbitmq消息队列发送“执行build_and_run_instance”请求 --->>> nova-compute服务获取到rpc请求,执行/nova/compute/manager.py中build_and_run_instance()函数,然后调用到self.driver.spawn(),开始调用hypervisor驱动程序孵化虚拟机,最后完成虚拟机的定义和启动。