-
Notifications
You must be signed in to change notification settings - Fork 23
[wip] discard celery related changes in the volumedriver update #2332
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,9 +87,6 @@ def restart_services(cls): | |
| # Get the migration plans for every volume on this host. If there are no plans for certain volumes, it will raise | ||
| balances_by_vpool = cls.get_vpool_balances_for_evacuating_storagerouter(cls.LOCAL_SR) | ||
| if initial_run_steps: | ||
| cls.logger.info('Offloading a MDS catchup to celery. This will ensure all slaves will be caught up to avoid deadlocking') | ||
| MDSServiceController.mds_catchup.apply_async() | ||
| # Plan to execute migrate. Avoid the VPool from being an HA target | ||
| cls.mark_storagerouter_unreachable_for_ha(cls.LOCAL_SR) | ||
| initial_run_steps = False | ||
| try: | ||
|
|
@@ -190,25 +187,15 @@ def migrate_away(cls, balances_by_vpool, storagerouter): | |
| :return: None | ||
| :raises: FailureDuringMigrateException if any volumes failed to move | ||
| """ | ||
| tasks = [] | ||
| signatures = [] | ||
| evacuate_srs = [storagerouter.guid] | ||
| for vpool, balances in balances_by_vpool.iteritems(): | ||
| # Serialize to offload to celery. DataObjects can't be serialized yet | ||
| serialized_balances = [b.to_dict() for b in balances] | ||
| signature = VPoolController.execute_balance_change.si(vpool.guid, serialized_balances, [storagerouter.guid]) | ||
| # Freeze freezes the task into its final form. This will net the async result object we'd normally get from delaying it | ||
| tasks.append(signature.freeze()) | ||
| signatures.append(signature) | ||
| if signatures: | ||
| cls.logger.info('Adding migration group with tasks {}'.format(', '.join(t.id for t in tasks))) | ||
| # Add all chain signatures to a group for parallel execution | ||
| task_group = group(signatures) | ||
| # Wait for the group result | ||
| async_result = task_group.apply_async() | ||
| cls.logger.info('Waiting for all tasks of group {}'.format(async_result.id)) | ||
| # Timeout similar to migrate_master_mds does not make a lot of sense. All tasks are executed in parallel | ||
| _ = async_result.get() | ||
| cls.logger.info("MDS migration finished") | ||
| for balance in balances: # type: VDiskBalance | ||
| if balance.storagedriver.storagerouter_guid in evacuate_srs: | ||
| successfull_moves, failed_moves = balance.execute_balance_change_through_overflow(balances, | ||
| user_input=False, | ||
| abort_on_error=False) | ||
| if failed_moves: | ||
| raise FailureDuringMigrateException('Could not move volumes {} away'.format(', '.join(failed_moves))) | ||
|
|
||
| @classmethod | ||
| def migrate_master_mds(cls, storagerouter, max_chain_size=100, group_timeout=10 * 60): | ||
|
|
@@ -226,26 +213,20 @@ def migrate_master_mds(cls, storagerouter, max_chain_size=100, group_timeout=10 | |
| """ | ||
| cls.logger.info("Starting MDS migrations") | ||
| while True: | ||
| hosted_vdisk_guids = storagerouter._vdisks_guids() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dont use signatures here as it will lock up. Discarded tasks are still being waited for The reason for the catchup (see comment above) is to ensure all slaves are caught up with their master |
||
| vpool_mds_master_vdisks = cls.get_vdisks_mds_masters_on_storagerouter(storagerouter) | ||
| all_masters_gone = sum(len(vds) for vds in vpool_mds_master_vdisks.values()) == 0 | ||
| if all_masters_gone: | ||
| break | ||
| all_tasks = [] | ||
| chains = [] | ||
| for vpool_guid, vdisk_guids in vpool_mds_master_vdisks.iteritems(): | ||
| signatures = [] | ||
| tasks = [] | ||
| for vdisk_guid in vdisk_guids[0:max_chain_size]: | ||
| if vdisk_guid in hosted_vdisk_guids: | ||
| cls.logger.warning('Skipping vDisk {} as it is still hosted on Storagerouter {}'.format(vdisk_guid, storagerouter.name)) | ||
| cls.logger.info('Ensuring safety for {}'.format(vdisk_guid)) | ||
| # Ensure safety is a common task. Let's timeout on the ensure single quickly to avoid worker lockups | ||
| signature = MDSServiceController.ensure_safety.si(vdisk_guid, ensure_single_timeout=5) | ||
| signature = MDSServiceController.ensure_safety.si(vdisk_guid) | ||
| # Freeze freezes the task into its final form. This will net the async result object we'd normally get from delaying it | ||
| tasks.append(signature.freeze()) | ||
| signatures.append(signature) | ||
| all_tasks.extend(tasks) | ||
| if signatures: | ||
| cls.logger.info('Adding chain for VPool {} with tasks {}'.format(vpool_guid, ', '.join(t.id for t in tasks))) | ||
| chains.append(chain(signatures)) | ||
|
|
@@ -254,24 +235,8 @@ def migrate_master_mds(cls, storagerouter, max_chain_size=100, group_timeout=10 | |
| # Wait for the group result | ||
| async_result = task_group.apply_async() | ||
| cls.logger.info('Waiting for all tasks of group {}'.format(async_result.id)) | ||
| try: | ||
| _ = async_result.get(timeout=group_timeout) | ||
| except TimeoutError: | ||
| cls.logger.warning('Migration took longer than expected. Revoking all non-started tasks') | ||
| revoked_tasks = [] | ||
| for task in all_tasks: | ||
| if task.state == 'PENDING': | ||
| # Certain PENDING tasks cannot be revoked. It appears they're non-existent. Not even the workers know about them | ||
| # @todo build a new result chain and wait for that | ||
| task.revoke() | ||
| revoked_tasks.append(task) | ||
| if revoked_tasks: | ||
| cls.logger.warning('Revoked migration tasks: {}'.format(', '.join(revoked_tasks))) | ||
| cls.logger.warning('Waiting for the execution on the running migrations') | ||
| _ = async_result.get() | ||
| _ = async_result.get() | ||
| cls.logger.info("MDS migration finished") | ||
| if len(hosted_vdisk_guids) > 0: | ||
| raise LocalMastersRemaining('vDisks are still hosted on Storagerouter to migrate from: {}'.format(', '.join(hosted_vdisk_guids), storagerouter.name)) | ||
|
|
||
| @staticmethod | ||
| def get_vdisks_mds_masters_on_storagerouter(storagerouter): | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Has to be kept. We dont care about the result, it just runs in the background