From 7d19611a2d6e21e8796f5be725295585dfc4f5c5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 15:00:11 +0000 Subject: [PATCH 01/73] routine to preprocess fx variables --- esmvalcore/_recipe.py | 84 ++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 25 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c94022b5b5..cf0117a64d 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -269,8 +269,7 @@ def _limit_datasets(variables, profile, max_datasets=0): if variable not in limited: limited.append(variable) - logger.info("Only considering %s", - ', '.join(v['alias'] for v in limited)) + logger.info("Only considering %s", ', '.join(v['alias'] for v in limited)) return limited @@ -391,6 +390,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): var = dict(variable) var_project = variable['project'] cmor_table = CMOR_TABLES[var_project] + valid_fx_vars = [] # Get all fx-related mips ('fx' always first, original mip last) fx_mips = ['fx'] @@ -404,16 +404,19 @@ def _get_correct_fx_file(variable, fx_varname, config_user): fx_variable = cmor_table.get_variable(fx_mip, fx_varname) if fx_variable is not None: searched_mips.append(fx_mip) - fx_var = _add_fxvar_keys( - {'short_name': fx_varname, 'mip': fx_mip}, var) - logger.debug("For CMIP6 fx variable '%s', found table '%s'", - fx_varname, fx_mip) + fx_var = _add_fxvar_keys({ + 'short_name': fx_varname, + 'mip': fx_mip + }, var) + logger.debug("For fx variable '%s', found table '%s'", fx_varname, + fx_mip) fx_files = _get_input_files(fx_var, config_user) # If files found, return them if fx_files: - logger.debug("Found CMIP6 fx variables '%s':\n%s", - fx_varname, pformat(fx_files)) + valid_fx_vars.append(fx_var) + logger.debug("Found fx variables '%s':\n%s", fx_varname, + pformat(fx_files)) break else: # No files found @@ -429,7 +432,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): if fx_files: fx_files = fx_files[0] - return fx_files + return fx_files, valid_fx_vars def _get_landsea_fraction_fx_dict(variable, config_user): @@ -439,7 +442,8 @@ def _get_landsea_fraction_fx_dict(variable, config_user): if variable['project'] != 'obs4mips': fx_vars.append('sftof') for fx_var in fx_vars: - fx_dict[fx_var] = _get_correct_fx_file(variable, fx_var, config_user) + fx_dict[fx_var] = _get_correct_fx_file(variable, fx_var, + config_user)[0] return fx_dict @@ -476,7 +480,8 @@ def _update_fx_settings(settings, variable, config_user): logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] fx_files_dict = { - 'sftgif': _get_correct_fx_file(variable, 'sftgif', config_user)} + 'sftgif': _get_correct_fx_file(variable, 'sftgif', config_user)[0] + } if fx_files_dict['sftgif']: settings['mask_landseaice']['fx_files'].append( fx_files_dict['sftgif']) @@ -489,12 +494,14 @@ def _update_fx_settings(settings, variable, config_user): logger.info(msg, 'land/sea fraction weighting', pformat(fx_dict)) for step in ('area_statistics', 'volume_statistics'): - if settings.get(step, {}).get('fx_files'): + if settings.get(step, {}).get('fx_files') and not \ + settings.get(step, {}).get('fx_preprocess'): var = dict(variable) var['fx_files'] = settings.get(step, {}).get('fx_files') fx_files_dict = { - fxvar: _get_correct_fx_file(variable, fxvar, config_user) - for fxvar in var['fx_files']} + fxvar: _get_correct_fx_file(variable, fxvar, config_user)[0] + for fxvar in var['fx_files'] + } settings[step]['fx_files'] = fx_files_dict logger.info(msg, step, pformat(fx_files_dict)) @@ -514,10 +521,9 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset (locally and via download).""" - input_files = get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']) + input_files = get_input_filelist(variable=variable, + rootpath=config_user['rootpath'], + drs=config_user['drs']) # Set up downloading using synda if requested. # Do not download if files are already available locally. @@ -703,9 +709,9 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_extract_shape(settings, config_user) _update_weighting_settings(settings, variable) - _update_fx_settings( - settings=settings, variable=variable, - config_user=config_user) + _update_fx_settings(settings=settings, + variable=variable, + config_user=config_user) _update_target_grid( variable=variable, variables=variables, @@ -829,8 +835,7 @@ def append(group_prefix, var): for variable in variables: group_prefix = variable['variable_group'] + '_derive_input_' if not variable.get('force_derivation') and _get_input_files( - variable, - config_user): + variable, config_user): # No need to derive, just process normally up to derive step var = deepcopy(variable) append(group_prefix, var) @@ -872,6 +877,8 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _add_cmor_info(variable) # Create preprocessor task(s) derive_tasks = [] + fx_preproc_tasks = [] + # set up tasks if variable.get('derive'): # Create tasks to prepare the input data for the derive step derive_profile, profile = _split_derive_profile(profile) @@ -890,6 +897,34 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): ) derive_tasks.append(task) + # special case: fx variable pre-processing + for step in ('area_statistics', 'volume_statistics'): + if profile.get(step, {}).get('fx_files') and \ + profile.get(step, {}).get('fx_preprocess'): + var = dict(variable) + fx_vars = profile.get(step, {}).get('fx_files') + + # Create tasks to prepare the input data for the fx var + order = _extract_preprocessor_order(profile) + before, after = _split_settings(profile, step, order) + fx_input = [ + _get_correct_fx_file(variable, fx_var, config_user)[1] + for fx_var in fx_vars + ] + + for fx_variable in fx_input: + fx_name = task_name.split( + TASKSEP)[0] + TASKSEP + fx_variable['short_name'] + task = _get_single_preprocessor_task( + fx_variables, + before, + config_user, + name=fx_name, + ) + fx_preproc_tasks.append(task) + + derive_tasks.extend(fx_preproc_tasks) + # Create (final) preprocessor task task = _get_single_preprocessor_task( variables, @@ -907,7 +942,6 @@ class Recipe: info_keys = ('project', 'dataset', 'exp', 'ensemble', 'version') """List of keys to be used to compose the alias, ordered by priority.""" - def __init__(self, raw_recipe, config_user, @@ -1004,7 +1038,7 @@ def _expand_ensemble(variables): if not match: expanded.append(variable) continue - start, end = match.group(0)[1: -1].split(':') + start, end = match.group(0)[1:-1].split(':') for i in range(int(start), int(end) + 1): expand = deepcopy(variable) expand['ensemble'] = regex.sub(str(i), ensemble, 1) From 01afa17ba0f1584303dc33fa4ae16aa010cd0363 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 16:06:06 +0000 Subject: [PATCH 02/73] fixed bug --- esmvalcore/_recipe.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index cf0117a64d..a91136c555 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -901,27 +901,24 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): for step in ('area_statistics', 'volume_statistics'): if profile.get(step, {}).get('fx_files') and \ profile.get(step, {}).get('fx_preprocess'): - var = dict(variable) fx_vars = profile.get(step, {}).get('fx_files') # Create tasks to prepare the input data for the fx var order = _extract_preprocessor_order(profile) before, after = _split_settings(profile, step, order) - fx_input = [ - _get_correct_fx_file(variable, fx_var, config_user)[1] + fx_variables = [ + _get_correct_fx_file(variable, fx_var, config_user)[1][0] for fx_var in fx_vars ] - - for fx_variable in fx_input: - fx_name = task_name.split( - TASKSEP)[0] + TASKSEP + fx_variable['short_name'] - task = _get_single_preprocessor_task( - fx_variables, - before, - config_user, - name=fx_name, - ) - fx_preproc_tasks.append(task) + fx_name = task_name.split( + TASKSEP)[0] + TASKSEP + fx_variables[0]['variable_group'] + task = _get_single_preprocessor_task( + fx_variables, + before, + config_user, + name=fx_name, + ) + fx_preproc_tasks.append(task) derive_tasks.extend(fx_preproc_tasks) From a96cae1120850675ca2cdd83ebd11cceaff96fe6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 16:06:21 +0000 Subject: [PATCH 03/73] added opt arg --- esmvalcore/preprocessor/_volume.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 4859b73639..d9d3c4262b 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -170,7 +170,8 @@ def calculate_volume(cube): def volume_statistics( cube, operator, - fx_files=None): + fx_files=None, + fx_preprocess=False): """ Apply a statistical operation over a volume. From ab887ef1d2344ee01d1d08d019afc4a3074a69ca Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 16:06:36 +0000 Subject: [PATCH 04/73] added non functional test --- tests/integration/test_recipe.py | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 682885cb8e..3435be11b5 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1819,6 +1819,70 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, assert '_Omon_' not in fx_files['volcello'] +def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + custom_order: true + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + volume_statistics: + operator: mean + fx_files: ['areacello', 'volcello'] + fx_preprocess: True + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.products) == 1 + product = task.products.pop() + + assert len(task.ancestors) == 1 + assert 'diagnostic_name' + TASKSEP + 'toz_derive_input_ps' in [ + t.name for t in task.ancestors + ] + assert 'diagnostic_name' + TASKSEP + 'toz_derive_input_tro3' in [ + t.name for t in task.ancestors + ] + + # Check product content of tasks + assert len(task.products) == 1 + product = task.products.pop() + assert 'derive' in product.settings + assert product.attributes['short_name'] == 'toz' + assert product.files + + ps_product = next(p for a in task.ancestors for p in a.products + if p.attributes['short_name'] == 'ps') + tro3_product = next(p for a in task.ancestors for p in a.products + if p.attributes['short_name'] == 'tro3') + assert ps_product.filename in product.files + assert tro3_product.filename in product.files + + def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, config_user): content = dedent(""" From 615ab3ba1d44799270465db358075fa839e5d8b1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 17:48:34 +0000 Subject: [PATCH 05/73] pick up already preprocced files as input --- esmvalcore/_recipe.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index a91136c555..94f85c6c25 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -494,9 +494,9 @@ def _update_fx_settings(settings, variable, config_user): logger.info(msg, 'land/sea fraction weighting', pformat(fx_dict)) for step in ('area_statistics', 'volume_statistics'): + var = dict(variable) if settings.get(step, {}).get('fx_files') and not \ settings.get(step, {}).get('fx_preprocess'): - var = dict(variable) var['fx_files'] = settings.get(step, {}).get('fx_files') fx_files_dict = { fxvar: _get_correct_fx_file(variable, fxvar, config_user)[0] @@ -504,6 +504,19 @@ def _update_fx_settings(settings, variable, config_user): } settings[step]['fx_files'] = fx_files_dict logger.info(msg, step, pformat(fx_files_dict)) + if settings.get(step, {}).get('fx_files') and \ + settings.get(step, {}).get('fx_preprocess'): + var['fx_files'] = settings.get(step, {}).get('fx_files') + fx_variables = [ + _get_correct_fx_file(variable, fxvar, config_user)[1][0] + for fxvar in var['fx_files'] + ] + fx_files_dict = { + fxvar: get_output_file(cmor_fx_var, config_user['preproc_dir']) + for fxvar, cmor_fx_var in zip(var['fx_files'], fx_variables) + } + settings[step]['fx_files'] = fx_files_dict + logger.info(msg, step, pformat(fx_files_dict)) def _read_attributes(filename): From f9c004722e99f25ca19132fd5b7f7fcb3af016fd Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 20 Jan 2020 18:15:05 +0000 Subject: [PATCH 06/73] made all fx vars run as ancestors --- esmvalcore/_recipe.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 94f85c6c25..b2804f2e2a 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -919,19 +919,21 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # Create tasks to prepare the input data for the fx var order = _extract_preprocessor_order(profile) before, after = _split_settings(profile, step, order) - fx_variables = [ - _get_correct_fx_file(variable, fx_var, config_user)[1][0] - for fx_var in fx_vars - ] - fx_name = task_name.split( - TASKSEP)[0] + TASKSEP + fx_variables[0]['variable_group'] - task = _get_single_preprocessor_task( - fx_variables, - before, - config_user, - name=fx_name, - ) - fx_preproc_tasks.append(task) + for var in variables: + fx_variables = [ + _get_correct_fx_file(var, fx_var, config_user)[1][0] + for fx_var in fx_vars + ] + fx_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'area-vol_stat_' + \ + fx_variables[0]['variable_group'] + task = _get_single_preprocessor_task( + fx_variables, + before, + config_user, + name=fx_name, + ) + fx_preproc_tasks.append(task) derive_tasks.extend(fx_preproc_tasks) From 2b575e1247ea94103e88c79d13d07639ee00b92c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 12:27:40 +0000 Subject: [PATCH 07/73] fixed handling of tasks --- esmvalcore/_recipe.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b2804f2e2a..e7cfd8d774 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -924,16 +924,17 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _get_correct_fx_file(var, fx_var, config_user)[1][0] for fx_var in fx_vars ] - fx_name = task_name.split( - TASKSEP)[0] + TASKSEP + 'area-vol_stat_' + \ - fx_variables[0]['variable_group'] - task = _get_single_preprocessor_task( - fx_variables, - before, - config_user, - name=fx_name, - ) - fx_preproc_tasks.append(task) + for fx_variable in fx_variables: + fx_name = task_name.split( + TASKSEP)[0] + TASKSEP + 'fx_area-volume_stats_' + \ + fx_variable['variable_group'] + task = _get_single_preprocessor_task( + [fx_variable], + before, + config_user, + name=fx_name, + ) + fx_preproc_tasks.append(task) derive_tasks.extend(fx_preproc_tasks) From 434ee125f8fc4870088890eba0e0b1f2aa50f7c4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 12:27:52 +0000 Subject: [PATCH 08/73] fixed test --- tests/integration/test_recipe.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 3435be11b5..a0c2a3e6ca 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1857,30 +1857,26 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, assert len(recipe.tasks) == 1 task = recipe.tasks.pop() assert task.name == 'diagnostic_name' + TASKSEP + 'tos' - assert len(task.products) == 1 - product = task.products.pop() - - assert len(task.ancestors) == 1 - assert 'diagnostic_name' + TASKSEP + 'toz_derive_input_ps' in [ + assert len(task.ancestors) == 2 + assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_areacello' in [ t.name for t in task.ancestors ] - assert 'diagnostic_name' + TASKSEP + 'toz_derive_input_tro3' in [ + assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_volcello' in [ t.name for t in task.ancestors ] # Check product content of tasks assert len(task.products) == 1 product = task.products.pop() - assert 'derive' in product.settings - assert product.attributes['short_name'] == 'toz' + assert product.attributes['short_name'] == 'tos' assert product.files - ps_product = next(p for a in task.ancestors for p in a.products - if p.attributes['short_name'] == 'ps') - tro3_product = next(p for a in task.ancestors for p in a.products - if p.attributes['short_name'] == 'tro3') - assert ps_product.filename in product.files - assert tro3_product.filename in product.files + area_product = next(p for a in task.ancestors for p in a.products + if p.attributes['short_name'] == 'areacello') + vol_product = next(p for a in task.ancestors for p in a.products + if p.attributes['short_name'] == 'volcello') + assert area_product.filename in product.files + assert vol_product.filename in product.files def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, From d1cdbc646f269677be2ff110439138a5399ff827 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 13:58:34 +0000 Subject: [PATCH 09/73] fixed main var bug finally --- esmvalcore/_recipe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index e7cfd8d774..e6b64098b8 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -496,7 +496,7 @@ def _update_fx_settings(settings, variable, config_user): for step in ('area_statistics', 'volume_statistics'): var = dict(variable) if settings.get(step, {}).get('fx_files') and not \ - settings.get(step, {}).get('fx_preprocess'): + var.get("fx_var_preprocess"): var['fx_files'] = settings.get(step, {}).get('fx_files') fx_files_dict = { fxvar: _get_correct_fx_file(variable, fxvar, config_user)[0] @@ -505,7 +505,7 @@ def _update_fx_settings(settings, variable, config_user): settings[step]['fx_files'] = fx_files_dict logger.info(msg, step, pformat(fx_files_dict)) if settings.get(step, {}).get('fx_files') and \ - settings.get(step, {}).get('fx_preprocess'): + var.get("fx_var_preprocess"): var['fx_files'] = settings.get(step, {}).get('fx_files') fx_variables = [ _get_correct_fx_file(variable, fxvar, config_user)[1][0] @@ -733,7 +733,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_regrid_time(variable, settings) ancestors = grouped_ancestors.get(variable['filename']) - if not ancestors: + if not ancestors or variable.get("fx_var_preprocess"): ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') and not ancestors: logger.info("Skipping: no data found for %s", variable) @@ -913,7 +913,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # special case: fx variable pre-processing for step in ('area_statistics', 'volume_statistics'): if profile.get(step, {}).get('fx_files') and \ - profile.get(step, {}).get('fx_preprocess'): + variable.get("fx_var_preprocess"): fx_vars = profile.get(step, {}).get('fx_files') # Create tasks to prepare the input data for the fx var From 1ba5c74023a4a497b620ae7e13f9905b9d123b6d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 13:58:51 +0000 Subject: [PATCH 10/73] fixed test proper --- tests/integration/test_recipe.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index a0c2a3e6ca..014852cc1f 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1833,13 +1833,13 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, volume_statistics: operator: mean fx_files: ['areacello', 'volcello'] - fx_preprocess: True diagnostics: diagnostic_name: variables: tos: preprocessor: preproc + fx_var_preprocess: True project: CMIP6 mip: Omon exp: historical @@ -1864,19 +1864,19 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_volcello' in [ t.name for t in task.ancestors ] - - # Check product content of tasks assert len(task.products) == 1 product = task.products.pop() assert product.attributes['short_name'] == 'tos' assert product.files - - area_product = next(p for a in task.ancestors for p in a.products - if p.attributes['short_name'] == 'areacello') - vol_product = next(p for a in task.ancestors for p in a.products - if p.attributes['short_name'] == 'volcello') - assert area_product.filename in product.files - assert vol_product.filename in product.files + assert 'volume_statistics' in product.settings + settings = product.settings['volume_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_files = settings['fx_files'] + assert isinstance(fx_files, dict) + assert 'preproc' in fx_files['areacello'] + assert 'CMIP6_CanESM5_Ofx_historical_r1i1p1f1_areacello.nc' in \ + fx_files['areacello'] def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, From 2ae17bcc23e8f1102a4b13fa8040da39cfc309af Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 14:03:11 +0000 Subject: [PATCH 11/73] removed old flag --- esmvalcore/preprocessor/_volume.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index d9d3c4262b..4859b73639 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -170,8 +170,7 @@ def calculate_volume(cube): def volume_statistics( cube, operator, - fx_files=None, - fx_preprocess=False): + fx_files=None): """ Apply a statistical operation over a volume. From 7bb133d7f84fd11804bd247e151d538ed772f33d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 14:35:54 +0000 Subject: [PATCH 12/73] added extra preproc sugg by Lee Co-Authored-By: Lee de Mora --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index e6b64098b8..dff4a06910 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -911,7 +911,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): derive_tasks.append(task) # special case: fx variable pre-processing - for step in ('area_statistics', 'volume_statistics'): + for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): if profile.get(step, {}).get('fx_files') and \ variable.get("fx_var_preprocess"): fx_vars = profile.get(step, {}).get('fx_files') From e99aff09417053a2941392360485640d2e4538ee Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 14:36:34 +0000 Subject: [PATCH 13/73] added extra preproc step sugg by Lee Co-Authored-By: Lee de Mora --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index dff4a06910..8012a3e7e2 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -493,7 +493,7 @@ def _update_fx_settings(settings, variable, config_user): settings['weighting_landsea_fraction']['fx_files'] = fx_dict logger.info(msg, 'land/sea fraction weighting', pformat(fx_dict)) - for step in ('area_statistics', 'volume_statistics'): + for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): var = dict(variable) if settings.get(step, {}).get('fx_files') and not \ var.get("fx_var_preprocess"): From aa67226a9ff94ac42c5a33b6e3bb145d567ad03c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 15:08:00 +0000 Subject: [PATCH 14/73] added check on ancestry --- tests/integration/test_recipe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 014852cc1f..e73afe44d4 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1858,6 +1858,7 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, task = recipe.tasks.pop() assert task.name == 'diagnostic_name' + TASKSEP + 'tos' assert len(task.ancestors) == 2 + assert len(task.ancestors[0].ancestors) == 0 assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_areacello' in [ t.name for t in task.ancestors ] From d897b1b2f4fc731690e4990d0bb78439bd4f936d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 16:43:57 +0000 Subject: [PATCH 15/73] made it more robust --- esmvalcore/_recipe.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 8012a3e7e2..b9c7a25c18 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -431,6 +431,8 @@ def _get_correct_fx_file(variable, fx_varname, config_user): # allow for empty lists corrected for by NE masks if fx_files: fx_files = fx_files[0] + if valid_fx_vars: + valid_fx_vars = valid_fx_vars[0] return fx_files, valid_fx_vars @@ -506,17 +508,25 @@ def _update_fx_settings(settings, variable, config_user): logger.info(msg, step, pformat(fx_files_dict)) if settings.get(step, {}).get('fx_files') and \ var.get("fx_var_preprocess"): + avail_fx_files_dict = {} var['fx_files'] = settings.get(step, {}).get('fx_files') fx_variables = [ - _get_correct_fx_file(variable, fxvar, config_user)[1][0] + _get_correct_fx_file(variable, fxvar, config_user)[1] for fxvar in var['fx_files'] ] - fx_files_dict = { - fxvar: get_output_file(cmor_fx_var, config_user['preproc_dir']) - for fxvar, cmor_fx_var in zip(var['fx_files'], fx_variables) + if fx_variables: + avail_fx_files_dict = { + cmor_fx_var['short_name']: + get_output_file(cmor_fx_var, config_user['preproc_dir']) + for cmor_fx_var in fx_variables + } + settings[step]['fx_files'] = { + non_fx: [] + for non_fx in var['fx_files'] + if non_fx not in avail_fx_files_dict } - settings[step]['fx_files'] = fx_files_dict - logger.info(msg, step, pformat(fx_files_dict)) + settings[step]['fx_files'].update(avail_fx_files_dict) + logger.info(msg, step, pformat(avail_fx_files_dict)) def _read_attributes(filename): @@ -921,7 +931,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): before, after = _split_settings(profile, step, order) for var in variables: fx_variables = [ - _get_correct_fx_file(var, fx_var, config_user)[1][0] + _get_correct_fx_file(var, fx_var, config_user)[1] for fx_var in fx_vars ] for fx_variable in fx_variables: From c1e5141feafb414796f5a532023ff4f969e373b5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 17:35:02 +0000 Subject: [PATCH 16/73] correct profile applied --- esmvalcore/_recipe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b9c7a25c18..b2ec951653 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -924,11 +924,13 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): if profile.get(step, {}).get('fx_files') and \ variable.get("fx_var_preprocess"): + # conserve profile + fx_profile = deepcopy(profile) fx_vars = profile.get(step, {}).get('fx_files') # Create tasks to prepare the input data for the fx var - order = _extract_preprocessor_order(profile) - before, after = _split_settings(profile, step, order) + order = _extract_preprocessor_order(fx_profile) + before, after = _split_settings(fx_profile, step, order) for var in variables: fx_variables = [ _get_correct_fx_file(var, fx_var, config_user)[1] From 5fc532fe5740c3ff772c49cbf0b67e4e9fa40ad8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 21 Jan 2020 17:39:27 +0000 Subject: [PATCH 17/73] replaced to dummy var --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index b2ec951653..967b9a49e7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -930,7 +930,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # Create tasks to prepare the input data for the fx var order = _extract_preprocessor_order(fx_profile) - before, after = _split_settings(fx_profile, step, order) + before, _ = _split_settings(fx_profile, step, order) for var in variables: fx_variables = [ _get_correct_fx_file(var, fx_var, config_user)[1] From 9ed2f573750e66f4baf6f7cafb9965f1c6746b0a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 22 Jan 2020 13:10:42 +0000 Subject: [PATCH 18/73] made it more robust --- esmvalcore/_recipe.py | 83 ++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 32 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 967b9a49e7..00539b2984 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -444,8 +444,9 @@ def _get_landsea_fraction_fx_dict(variable, config_user): if variable['project'] != 'obs4mips': fx_vars.append('sftof') for fx_var in fx_vars: - fx_dict[fx_var] = _get_correct_fx_file(variable, fx_var, - config_user)[0] + fx_file, _ = _get_correct_fx_file(variable, fx_var, config_user) + fx_dict[fx_var] = fx_file + return fx_dict @@ -481,9 +482,8 @@ def _update_fx_settings(settings, variable, config_user): if 'mask_landseaice' in settings: logger.debug('Getting fx mask settings now...') settings['mask_landseaice']['fx_files'] = [] - fx_files_dict = { - 'sftgif': _get_correct_fx_file(variable, 'sftgif', config_user)[0] - } + fx_file, _ = _get_correct_fx_file(variable, 'sftgif', config_user) + fx_files_dict = {'sftgif': fx_file} if fx_files_dict['sftgif']: settings['mask_landseaice']['fx_files'].append( fx_files_dict['sftgif']) @@ -497,36 +497,55 @@ def _update_fx_settings(settings, variable, config_user): for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): var = dict(variable) - if settings.get(step, {}).get('fx_files') and not \ - var.get("fx_var_preprocess"): + cmor_fxvars = [] + fx_files_dict = {} + if settings.get(step, {}).get('fx_files'): var['fx_files'] = settings.get(step, {}).get('fx_files') - fx_files_dict = { - fxvar: _get_correct_fx_file(variable, fxvar, config_user)[0] - for fxvar in var['fx_files'] - } + for fxvar in var['fx_files']: + fx_file, cmor_fx_var = _get_correct_fx_file( + variable, fxvar, config_user) + fx_files_dict[fxvar] = fx_file + if cmor_fx_var: + cmor_fxvars.append(cmor_fx_var) + + # special case for preprocessing fx variables + if var.get("fx_var_preprocess"): + + # a brand new dictionary in this case + fx_files_dict = {} + + # we need to switch to (virtual) output file + # but need to perform data checks first + if not cmor_fxvars: + raise RecipeError( + f"No data files found for {var['fx_files']} " + f"for variable {var['short_name']} " + f"but are needed for preprocessing.") + else: + fxvars_with_data = [ + cmor_fx_var['short_name'] + for cmor_fx_var in cmor_fxvars + ] + missing_fx_vars = [ + fx for fx in var['fx_files'] + if fx not in fxvars_with_data + ] + if missing_fx_vars: + raise RecipeError( + f"No data files found for {missing_fx_vars} " + f"for variable {var['short_name']} " + f"but are needed for preprocessing.") + else: + fx_files_dict = { + cmor_fx_var['short_name']: + get_output_file(cmor_fx_var, + config_user['preproc_dir']) + for cmor_fx_var in cmor_fxvars + } + + # finally construct the fx files dictionary settings[step]['fx_files'] = fx_files_dict logger.info(msg, step, pformat(fx_files_dict)) - if settings.get(step, {}).get('fx_files') and \ - var.get("fx_var_preprocess"): - avail_fx_files_dict = {} - var['fx_files'] = settings.get(step, {}).get('fx_files') - fx_variables = [ - _get_correct_fx_file(variable, fxvar, config_user)[1] - for fxvar in var['fx_files'] - ] - if fx_variables: - avail_fx_files_dict = { - cmor_fx_var['short_name']: - get_output_file(cmor_fx_var, config_user['preproc_dir']) - for cmor_fx_var in fx_variables - } - settings[step]['fx_files'] = { - non_fx: [] - for non_fx in var['fx_files'] - if non_fx not in avail_fx_files_dict - } - settings[step]['fx_files'].update(avail_fx_files_dict) - logger.info(msg, step, pformat(avail_fx_files_dict)) def _read_attributes(filename): From f84f6e76a85890927edec95c6f42a4826e2af34c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 22 Jan 2020 13:41:40 +0000 Subject: [PATCH 19/73] more power to the test --- tests/integration/test_recipe.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index e73afe44d4..7a58de0273 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1865,6 +1865,15 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_volcello' in [ t.name for t in task.ancestors ] + for product in task.products: + assert 'volume_statistics' in product.settings + assert product.attributes['short_name'] == 'tos' + for ancestor_product in task.ancestors[0].products: + assert ancestor_product.attributes['short_name'] == 'areacello' + assert 'volume_statistics' not in ancestor_product.settings + for ancestor_product in task.ancestors[1].products: + assert ancestor_product.attributes['short_name'] == 'volcello' + assert 'volume_statistics' not in ancestor_product.settings assert len(task.products) == 1 product = task.products.pop() assert product.attributes['short_name'] == 'tos' From e520166a41359305ac6c5a356c71dd11a4f7ed31 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 22 Jan 2020 15:54:42 +0000 Subject: [PATCH 20/73] allow for user defined fxvars dicts --- esmvalcore/_recipe.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 00539b2984..f3816ef7ff 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -386,7 +386,15 @@ def _add_fxvar_keys(fx_var_dict, variable): def _get_correct_fx_file(variable, fx_varname, config_user): """Get fx files (searching all possible mips).""" - # TODO: allow user to specify certain mip if desired + # first see if we have strings or dicts for fx_varname + user_defined = False + if isinstance(fx_varname, dict): + user_defined = True + user_fx_mip = fx_varname.get('mip') + user_fx_experiment = fx_varname.get('exp') + fx_varname = fx_varname.get('short_name') + + # assemble info from master variable var = dict(variable) var_project = variable['project'] cmor_table = CMOR_TABLES[var_project] @@ -398,7 +406,14 @@ def _get_correct_fx_file(variable, fx_varname, config_user): [key for key in cmor_table.tables if 'fx' in key and key != 'fx']) fx_mips.append(variable['mip']) + # force only the mip declared by user + if user_defined and user_fx_mip: + fx_mips = [ + user_fx_mip, + ] + # Search all mips for available variables + # priority goes to user specified mip if available searched_mips = [] for fx_mip in fx_mips: fx_variable = cmor_table.get_variable(fx_mip, fx_varname) @@ -408,6 +423,8 @@ def _get_correct_fx_file(variable, fx_varname, config_user): 'short_name': fx_varname, 'mip': fx_mip }, var) + if user_defined and user_fx_experiment: + fx_var['exp'] = user_fx_experiment logger.debug("For fx variable '%s', found table '%s'", fx_varname, fx_mip) fx_files = _get_input_files(fx_var, config_user) @@ -502,9 +519,15 @@ def _update_fx_settings(settings, variable, config_user): if settings.get(step, {}).get('fx_files'): var['fx_files'] = settings.get(step, {}).get('fx_files') for fxvar in var['fx_files']: + fxvar_name = fxvar + if isinstance(fxvar, dict): + fxvar_name = fxvar.get("short_name") + if not fxvar_name: + raise RecipeError( + "Key short_name missing from {}".format(fxvar)) fx_file, cmor_fx_var = _get_correct_fx_file( variable, fxvar, config_user) - fx_files_dict[fxvar] = fx_file + fx_files_dict[fxvar_name] = fx_file if cmor_fx_var: cmor_fxvars.append(cmor_fx_var) @@ -530,6 +553,11 @@ def _update_fx_settings(settings, variable, config_user): fx for fx in var['fx_files'] if fx not in fxvars_with_data ] + if isinstance(fxvar, dict): + missing_fx_vars = [ + fx.get('short_name') for fx in var['fx_files'] + if fx.get('short_name') not in fxvars_with_data + ] if missing_fx_vars: raise RecipeError( f"No data files found for {missing_fx_vars} " From 967423fcd205b94af0f892ea1305f7ff4b831e22 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 22 Jan 2020 15:55:03 +0000 Subject: [PATCH 21/73] added needed tests for fxvars dicts --- tests/integration/test_recipe.py | 120 +++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 7a58de0273..54cfecb17b 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1819,6 +1819,54 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, assert '_Omon_' not in fx_files['volcello'] +def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + volume_statistics: + operator: mean + fx_files: [{'short_name': 'volcello', 'mip': 'Oyr', + 'exp': 'piControl'}] + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.products) == 1 + product = task.products.pop() + + # Check volume_statistics + assert 'volume_statistics' in product.settings + settings = product.settings['volume_statistics'] + assert len(settings) == 2 + assert settings['operator'] == 'mean' + fx_files = settings['fx_files'] + assert isinstance(fx_files, dict) + assert len(fx_files) == 1 + assert '_Oyr_' in fx_files['volcello'] + assert '_piControl_' in fx_files['volcello'] + assert '_Omon_' not in fx_files['volcello'] + + def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" @@ -1889,6 +1937,78 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, fx_files['areacello'] +def test_fx_dicts_volcello_preproc_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + custom_order: true + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + volume_statistics: + operator: mean + fx_files: [{'short_name': 'areacello', 'mip': 'Ofx', + 'exp': 'piControl'}, + {'short_name': 'volcello', 'mip': 'Omon', + 'exp': 'historical'}] + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + fx_var_preprocess: True + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.ancestors) == 2 + assert len(task.ancestors[0].ancestors) == 0 + assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_areacello' in [ + t.name for t in task.ancestors + ] + assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_volcello' in [ + t.name for t in task.ancestors + ] + for product in task.products: + assert 'volume_statistics' in product.settings + assert product.attributes['short_name'] == 'tos' + for ancestor_product in task.ancestors[0].products: + assert ancestor_product.attributes['short_name'] == 'areacello' + assert ancestor_product.attributes['mip'] == 'Ofx' + assert ancestor_product.attributes['exp'] == 'piControl' + assert 'volume_statistics' not in ancestor_product.settings + for ancestor_product in task.ancestors[1].products: + assert ancestor_product.attributes['short_name'] == 'volcello' + assert ancestor_product.attributes['mip'] == 'Omon' + assert ancestor_product.attributes['exp'] == 'historical' + assert 'volume_statistics' not in ancestor_product.settings + for ancestor_product in task.ancestors[1].products: + assert ancestor_product.attributes['short_name'] == 'volcello' + assert 'volume_statistics' not in ancestor_product.settings + assert len(task.products) == 1 + product = task.products.pop() + assert product.attributes['short_name'] == 'tos' + assert product.files + assert 'volume_statistics' in product.settings + + def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, config_user): content = dedent(""" From e8711a256295fe35f6b84d48683b371cca3ea4c6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 22 Jan 2020 16:42:56 +0000 Subject: [PATCH 22/73] added doc on new features for fx files --- doc/esmvalcore/preprocessor.rst | 86 +++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/doc/esmvalcore/preprocessor.rst b/doc/esmvalcore/preprocessor.rst index 70e5683045..5e784d8904 100644 --- a/doc/esmvalcore/preprocessor.rst +++ b/doc/esmvalcore/preprocessor.rst @@ -414,6 +414,92 @@ variable items e.g.: .. _masking of missing values: + +Using mask files for area/volume/zonal statistics +------------------------------------------------- + +Preprocessors like `volume_statistics` may take the optional key `fx_files` to allow +the computation of statistics with fx data; this data is retrieved in the same manner +as regular data files and can be ingested in the statistical computation as-is or it +can be preprocessed: + +Key features: + +- it is possible to run a set of preprocessor steps on the `fx_files: [vars or dicts of vars]` specified in the `area_statistics`, `volume_statistics` or `zonal_statistics`by setting the `fx_var_preprocess: True` in the `variable`; take for instance this example: + +.. code-block:: yaml + + preprocessors: + prep: + custom_order: true + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + volume_statistics: + operator: mean + fx_files: [volcello, ] + + diagnostics: + diag: + variables: + thetao700_Omon: + short_name: thetao + preprocessor: prep + fx_var_preprocess: True + mip: Omon + exp: historical + ensemble: r1i1p1f1 + grid: gn + start_year: 2010 + end_year: 2014 + additional_datasets: + - {dataset: GFDL-CM4} + - {dataset: HadGEM3-GC31-LL} + - {dataset: UKESM1-0-LL} + + +this setup will run the custom-ordered preprocessor steps in `preprocessors/prep` before (and not including) `volume_statistics` for the fx variable `volcello`; + +- it is also possible to request the same type of fx variables but as dictionaries, providing more information towards the retrieval of the correct data; take this snippet for example: + + +.. code-block:: yaml + + preprocessors: + prep: + custom_order: true + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + volume_statistics: + operator: mean + fx_files: [{short_name: volcello, mip: Omon, exp: historical}] + + diagnostics: + diag: + variables: + thetao700_Omon: + short_name: thetao + preprocessor: prep + fx_var_preprocess: True + mip: Omon + exp: historical + ensemble: r1i1p1f1 + grid: gn + start_year: 2010 + end_year: 2014 + additional_datasets: + - {dataset: GFDL-CM4} + - {dataset: HadGEM3-GC31-LL} + - {dataset: UKESM1-0-LL} + + +in this case, by providing the extra CMOR keys, we allow the code to find the `volcello` with mip `Omon` and experiment `historical`. This is particularly useful when the actual fx data is not found in the master variable's (`thetao` in this case) data experiment; this is also useful to force a certain time-dependent data (mip: Omon) for preprocessors that need time operations + Missing values masks -------------------- From a3b33ddb400c4359772fa9478b8e4790793dcae8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 13:17:17 +0000 Subject: [PATCH 23/73] remove time preprocessors for fx mips --- esmvalcore/_recipe.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f3816ef7ff..11e5c4a082 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -22,7 +22,7 @@ from .cmor.table import CMOR_TABLES from .preprocessor import (DEFAULT_ORDER, FINAL_STEPS, INITIAL_STEPS, MULTI_MODEL_FUNCTIONS, PreprocessingTask, - PreprocessorFile) + PreprocessorFile, TIME_PREPROCESSORS) from .preprocessor._derive import get_required from .preprocessor._download import synda_search from .preprocessor._io import DATASET_KEYS, concatenate_callback @@ -929,6 +929,16 @@ def append(group_prefix, var): return derive_input +def _remove_time_preproc(fxprofile): + "Remove all time preprocessors from the fx profile.""" + fxprofile = deepcopy(fxprofile) + for key in fxprofile: + if key in TIME_PREPROCESSORS: + fxprofile[key] = False + + return fxprofile + + def _get_preprocessor_task(variables, profiles, config_user, task_name): """Create preprocessor task(s) for a set of datasets.""" # First set up the preprocessor profile @@ -977,13 +987,17 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # Create tasks to prepare the input data for the fx var order = _extract_preprocessor_order(fx_profile) - before, _ = _split_settings(fx_profile, step, order) for var in variables: fx_variables = [ _get_correct_fx_file(var, fx_var, config_user)[1] for fx_var in fx_vars ] for fx_variable in fx_variables: + before, _ = _split_settings(fx_profile, step, order) + # remove time preprocessors for any fx/Ofx/Efx/etc + # that dont have time coords + if 'fx' in fx_variable['mip']: + before = _remove_time_preproc(before) fx_name = task_name.split( TASKSEP)[0] + TASKSEP + 'fx_area-volume_stats_' + \ fx_variable['variable_group'] From aaa638fe7a97cc5984a8873b392520b638b1404d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 13:17:38 +0000 Subject: [PATCH 24/73] added time preprocessors list --- esmvalcore/preprocessor/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 2438dfb876..10e4406f5f 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -104,6 +104,20 @@ 'cleanup', ] +TIME_PREPROCESSORS = [ + 'extract_time', + 'extract_season', + 'extract_month', + 'daily_statistics', + 'monthly_statistics', + 'seasonal_statistics', + 'annual_statistics', + 'decadal_statistics', + 'climate_statistics', + 'anomalies', + 'regrid_time', +] + DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured From c80d027895faa2a18ee3807b61914302995d702b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 13:17:58 +0000 Subject: [PATCH 25/73] fixed test and asdded check on time preproc removal --- tests/integration/test_recipe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 54cfecb17b..3ee06ed7e4 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1993,14 +1993,13 @@ def test_fx_dicts_volcello_preproc_cmip6(tmp_path, patched_datafinder, assert ancestor_product.attributes['short_name'] == 'areacello' assert ancestor_product.attributes['mip'] == 'Ofx' assert ancestor_product.attributes['exp'] == 'piControl' + assert 'annual_statistics' not in ancestor_product.settings assert 'volume_statistics' not in ancestor_product.settings for ancestor_product in task.ancestors[1].products: assert ancestor_product.attributes['short_name'] == 'volcello' assert ancestor_product.attributes['mip'] == 'Omon' assert ancestor_product.attributes['exp'] == 'historical' - assert 'volume_statistics' not in ancestor_product.settings - for ancestor_product in task.ancestors[1].products: - assert ancestor_product.attributes['short_name'] == 'volcello' + assert 'annual_statistics' in ancestor_product.settings assert 'volume_statistics' not in ancestor_product.settings assert len(task.products) == 1 product = task.products.pop() From dbcd03e82243692f1b8143dc09f82b01adcb5dd8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 13:30:19 +0000 Subject: [PATCH 26/73] changed declaration of fx files --- esmvalcore/_recipe.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 11e5c4a082..5abff439ae 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -415,6 +415,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): # Search all mips for available variables # priority goes to user specified mip if available searched_mips = [] + fx_files = [] for fx_mip in fx_mips: fx_variable = cmor_table.get_variable(fx_mip, fx_varname) if fx_variable is not None: @@ -435,9 +436,6 @@ def _get_correct_fx_file(variable, fx_varname, config_user): logger.debug("Found fx variables '%s':\n%s", fx_varname, pformat(fx_files)) break - else: - # No files found - fx_files = [] # If fx variable was not found in any table, raise exception if not searched_mips: From fe5a1eac45be97d7d14f9416cb7591e263ab4a46 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 13:57:12 +0000 Subject: [PATCH 27/73] add raiser if no available fx vars --- esmvalcore/_recipe.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 5abff439ae..99bb92c797 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -990,6 +990,12 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _get_correct_fx_file(var, fx_var, config_user)[1] for fx_var in fx_vars ] + # list may be (intentionally) empty - catch it here + if not fx_variables: + raise RecipeError( + f"Preprocessor task for {fx_var} of {var}" + f"can not be performed since there is " + f"no data available. ") for fx_variable in fx_variables: before, _ = _split_settings(fx_profile, step, order) # remove time preprocessors for any fx/Ofx/Efx/etc From e0ab9cb2aac3f604fe35bfdab877f0de445aa3ee Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 14:15:21 +0000 Subject: [PATCH 28/73] actually use the recipe error correctly --- esmvalcore/_recipe.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 99bb92c797..bd5731c502 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -990,13 +990,13 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): _get_correct_fx_file(var, fx_var, config_user)[1] for fx_var in fx_vars ] - # list may be (intentionally) empty - catch it here - if not fx_variables: - raise RecipeError( - f"Preprocessor task for {fx_var} of {var}" - f"can not be performed since there is " - f"no data available. ") for fx_variable in fx_variables: + # list may be (intentionally) empty - catch it here + if not fx_variable: + raise RecipeError( + f"One or more of {fx_vars} are missing data. " + f"Task can not be performed since there is " + f"no data available. ") before, _ = _split_settings(fx_profile, step, order) # remove time preprocessors for any fx/Ofx/Efx/etc # that dont have time coords From c6e4ca3ec9c0e6d65befb4f0ece31dab25241b76 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 14:26:23 +0000 Subject: [PATCH 29/73] adding Lee's suggestion Co-Authored-By: Lee de Mora --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index bd5731c502..2765550d91 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -994,7 +994,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # list may be (intentionally) empty - catch it here if not fx_variable: raise RecipeError( - f"One or more of {fx_vars} are missing data. " + f"One or more of {step} fx data for of {var['short_name']} are missing. " f"Task can not be performed since there is " f"no data available. ") before, _ = _split_settings(fx_profile, step, order) From 07508020d43e91b7ae8ffac39698028400384c2a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 14:27:16 +0000 Subject: [PATCH 30/73] adding Lee's suggestion Co-Authored-By: Lee de Mora --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 2765550d91..18dd980c76 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -996,7 +996,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): raise RecipeError( f"One or more of {step} fx data for of {var['short_name']} are missing. " f"Task can not be performed since there is " - f"no data available. ") + f"no fx data found.") before, _ = _split_settings(fx_profile, step, order) # remove time preprocessors for any fx/Ofx/Efx/etc # that dont have time coords From a4ff5fc54de81bf41f6f8ecab207a36d2d278d3a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 14:33:52 +0000 Subject: [PATCH 31/73] correct line too long --- esmvalcore/_recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 18dd980c76..5c57ff6286 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -994,7 +994,8 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # list may be (intentionally) empty - catch it here if not fx_variable: raise RecipeError( - f"One or more of {step} fx data for of {var['short_name']} are missing. " + f"One or more of {step} fx data " + f"for {var['short_name']} are missing. " f"Task can not be performed since there is " f"no fx data found.") before, _ = _split_settings(fx_profile, step, order) From 15e1a675bd0f73685ba754e83fd257ea08297969 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 14:43:31 +0000 Subject: [PATCH 32/73] added Manuel suggestion --- esmvalcore/_recipe.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 5c57ff6286..299e68c400 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -386,13 +386,14 @@ def _add_fxvar_keys(fx_var_dict, variable): def _get_correct_fx_file(variable, fx_varname, config_user): """Get fx files (searching all possible mips).""" - # first see if we have strings or dicts for fx_varname - user_defined = False - if isinstance(fx_varname, dict): - user_defined = True - user_fx_mip = fx_varname.get('mip') - user_fx_experiment = fx_varname.get('exp') - fx_varname = fx_varname.get('short_name') + # make it a dict + if not isinstance(fx_varname, dict): + fx_varname = {'short_name': fx_varname} + + # get some parameters + user_fx_mip = fx_varname.get('mip') + user_fx_experiment = fx_varname.get('exp') + fx_varname = fx_varname.get('short_name') # assemble info from master variable var = dict(variable) @@ -407,7 +408,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): fx_mips.append(variable['mip']) # force only the mip declared by user - if user_defined and user_fx_mip: + if user_fx_mip: fx_mips = [ user_fx_mip, ] @@ -424,7 +425,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): 'short_name': fx_varname, 'mip': fx_mip }, var) - if user_defined and user_fx_experiment: + if user_fx_experiment: fx_var['exp'] = user_fx_experiment logger.debug("For fx variable '%s', found table '%s'", fx_varname, fx_mip) From cba0cbb1b1a705d6d313dffac6c715b2824cc5bc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 15:15:57 +0000 Subject: [PATCH 33/73] removed redundant checks --- esmvalcore/_recipe.py | 46 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 299e68c400..1419b2a4fc 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -443,6 +443,9 @@ def _get_correct_fx_file(variable, fx_varname, config_user): raise RecipeError( f"Requested fx variable '{fx_varname}' not available in " f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + # flag a warning + if not fx_files: + logger.warning("Missing data for fx variable {}".format(fx_varname)) # allow for empty lists corrected for by NE masks if fx_files: @@ -532,43 +535,12 @@ def _update_fx_settings(settings, variable, config_user): # special case for preprocessing fx variables if var.get("fx_var_preprocess"): - - # a brand new dictionary in this case - fx_files_dict = {} - - # we need to switch to (virtual) output file - # but need to perform data checks first - if not cmor_fxvars: - raise RecipeError( - f"No data files found for {var['fx_files']} " - f"for variable {var['short_name']} " - f"but are needed for preprocessing.") - else: - fxvars_with_data = [ - cmor_fx_var['short_name'] - for cmor_fx_var in cmor_fxvars - ] - missing_fx_vars = [ - fx for fx in var['fx_files'] - if fx not in fxvars_with_data - ] - if isinstance(fxvar, dict): - missing_fx_vars = [ - fx.get('short_name') for fx in var['fx_files'] - if fx.get('short_name') not in fxvars_with_data - ] - if missing_fx_vars: - raise RecipeError( - f"No data files found for {missing_fx_vars} " - f"for variable {var['short_name']} " - f"but are needed for preprocessing.") - else: - fx_files_dict = { - cmor_fx_var['short_name']: - get_output_file(cmor_fx_var, - config_user['preproc_dir']) - for cmor_fx_var in cmor_fxvars - } + fx_files_dict = { + cmor_fx_var['short_name']: + get_output_file(cmor_fx_var, + config_user['preproc_dir']) + for cmor_fx_var in cmor_fxvars + } # finally construct the fx files dictionary settings[step]['fx_files'] = fx_files_dict From a733e0dfded0c4c5e43f639d5bde1e98f048b786 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 17:00:25 +0000 Subject: [PATCH 34/73] simplified approach and include activity --- esmvalcore/_recipe.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 1419b2a4fc..fdfcd55e9c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -364,23 +364,20 @@ def _get_default_settings(variable, config_user, derive=False): def _add_fxvar_keys(fx_var_dict, variable): """Add keys specific to fx variable to use get_input_filelist.""" fx_variable = dict(variable) + fx_variable.update(fx_var_dict) # set variable names fx_variable['variable_group'] = fx_var_dict['short_name'] - fx_variable['short_name'] = fx_var_dict['short_name'] # specificities of project if fx_variable['project'] == 'CMIP5': - fx_variable['mip'] = 'fx' fx_variable['ensemble'] = 'r0i0p0' - elif fx_variable['project'] == 'CMIP6': - fx_variable['grid'] = variable['grid'] - if 'mip' in fx_var_dict: - fx_variable['mip'] = fx_var_dict['mip'] elif fx_variable['project'] in ['OBS', 'OBS6', 'obs4mips']: fx_variable['mip'] = 'fx' + # add missing cmor info _add_cmor_info(fx_variable, override=True) + return fx_variable @@ -390,11 +387,6 @@ def _get_correct_fx_file(variable, fx_varname, config_user): if not isinstance(fx_varname, dict): fx_varname = {'short_name': fx_varname} - # get some parameters - user_fx_mip = fx_varname.get('mip') - user_fx_experiment = fx_varname.get('exp') - fx_varname = fx_varname.get('short_name') - # assemble info from master variable var = dict(variable) var_project = variable['project'] @@ -408,6 +400,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): fx_mips.append(variable['mip']) # force only the mip declared by user + user_fx_mip = fx_varname.get('mip') if user_fx_mip: fx_mips = [ user_fx_mip, @@ -418,15 +411,11 @@ def _get_correct_fx_file(variable, fx_varname, config_user): searched_mips = [] fx_files = [] for fx_mip in fx_mips: - fx_variable = cmor_table.get_variable(fx_mip, fx_varname) + fx_variable = cmor_table.get_variable(fx_mip, fx_varname['short_name']) if fx_variable is not None: searched_mips.append(fx_mip) - fx_var = _add_fxvar_keys({ - 'short_name': fx_varname, - 'mip': fx_mip - }, var) - if user_fx_experiment: - fx_var['exp'] = user_fx_experiment + fx_varname['mip'] = fx_mip + fx_var = _add_fxvar_keys(fx_varname, var) logger.debug("For fx variable '%s', found table '%s'", fx_varname, fx_mip) fx_files = _get_input_files(fx_var, config_user) From 44aa6ae3450d48f7da776b60af9063ddfa7f901a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 23 Jan 2020 17:00:38 +0000 Subject: [PATCH 35/73] removed redundant test --- tests/integration/test_recipe.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 3ee06ed7e4..983617a735 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2205,33 +2205,3 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert msg in rec_err_exp - - -def test_fx_var_invalid_project(tmp_path, patched_datafinder, config_user): - content = dedent(""" - preprocessors: - preproc: - area_statistics: - operator: mean - fx_files: ['areacella'] - - diagnostics: - diagnostic_name: - variables: - tas: - preprocessor: preproc - project: EMAC - mip: Amon - exp: historical - start_year: 2000 - end_year: 2005 - ensemble: r1i1p1f1 - grid: gn - additional_datasets: - - {dataset: CanESM5} - scripts: null - """) - msg = 'Project EMAC not supported with fx variables' - with pytest.raises(RecipeError) as rec_err_exp: - get_recipe(tmp_path, content, config_user) - assert msg in rec_err_exp From 2a603cbc978d3456161ee21f4017850eb204af6a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 13:42:31 +0000 Subject: [PATCH 36/73] remove duplication in task ancestry and run task only once for unique ancestors; used case fx tasks with different activities but identical fx mips --- esmvalcore/_recipe.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index fdfcd55e9c..43de9a4861 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -526,8 +526,7 @@ def _update_fx_settings(settings, variable, config_user): if var.get("fx_var_preprocess"): fx_files_dict = { cmor_fx_var['short_name']: - get_output_file(cmor_fx_var, - config_user['preproc_dir']) + get_output_file(cmor_fx_var, config_user['preproc_dir']) for cmor_fx_var in cmor_fxvars } @@ -890,7 +889,7 @@ def append(group_prefix, var): def _remove_time_preproc(fxprofile): - "Remove all time preprocessors from the fx profile.""" + "Remove all time preprocessors from the fx profile." "" fxprofile = deepcopy(fxprofile) for key in fxprofile: if key in TIME_PREPROCESSORS: @@ -990,6 +989,25 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): return task +def _check_duplication(task): + """Check and remove duplicate ancestry tasks.""" + ancestors_filename_dict = OrderedDict() + filenames = [] + if isinstance(task, PreprocessingTask): + for ancestor_task in task.ancestors: + for anc_product in ancestor_task.products: + ancestors_filename_dict[anc_product.filename] = ancestor_task + filenames.append(anc_product.filename) + set_ancestors = list( + set([ancestors_filename_dict[filename] for filename in filenames])) + task.ancestors = [ + ancestor for ancestor in task.ancestors + if ancestor in set_ancestors + ] + + return task + + class Recipe: """Recipe object.""" @@ -1362,6 +1380,11 @@ def initialize_tasks(self): config_user=self._cfg, task_name=task_name, ) + # remove possible duplicate ancestor tasks that + # preprocess the same + # fx var file needed by different var["activity"] but with + # the same output fx var file + _check_duplication(task) for task0 in task.flatten(): task0.priority = priority tasks.add(task) From 4c0f8be66100542a2d39e165ad99d93d22a240a8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 14:00:48 +0000 Subject: [PATCH 37/73] use the preproc time preprocs list and robustify the testing as a consequence --- esmvalcore/_recipe_checks.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 68a3064875..1b9526eca4 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -7,7 +7,7 @@ from ._data_finder import get_start_end_year from ._task import get_flattened_tasks, which -from .preprocessor import PreprocessingTask +from .preprocessor import PreprocessingTask, TIME_PREPROCESSORS logger = logging.getLogger(__name__) @@ -110,8 +110,7 @@ def data_availability(input_files, var): if missing_years: raise RecipeError( "No input data available for years {} in files {}".format( - ", ".join(str(year) for year in missing_years), - input_files)) + ", ".join(str(year) for year in missing_years), input_files)) def tasks_valid(tasks): @@ -128,16 +127,10 @@ def tasks_valid(tasks): def check_for_temporal_preprocs(profile): """Check for temporal operations on fx variables.""" - temporal_preprocs = [ - 'extract_season', - 'extract_month', - 'annual_mean', - 'seasonal_mean', - 'time_average', - 'regrid_time', - ] temp_preprocs = [ - preproc for preproc in profile if preproc in temporal_preprocs] + preproc for preproc in profile + if profile[preproc] and preproc in TIME_PREPROCESSORS + ] if temp_preprocs: raise RecipeError( "Time coordinate preprocessor step {} not permitted on fx vars \ From a22a8fa93fce79872c92925a51d35b519c31e7c7 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 15:09:54 +0000 Subject: [PATCH 38/73] renamed tests and added extra case for activity switch --- tests/integration/test_recipe.py | 74 ++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 983617a735..70f8b2953b 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1867,8 +1867,8 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, assert '_Omon_' not in fx_files['volcello'] -def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, - config_user): +def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, + config_user): content = dedent(""" preprocessors: preproc: @@ -1937,8 +1937,8 @@ def test_fx_vars_volcello_preproc_cmip6(tmp_path, patched_datafinder, fx_files['areacello'] -def test_fx_dicts_volcello_preproc_cmip6(tmp_path, patched_datafinder, - config_user): +def test_fx_vars_dicts_preproc_cmip6(tmp_path, patched_datafinder, + config_user): content = dedent(""" preprocessors: preproc: @@ -2008,6 +2008,72 @@ def test_fx_dicts_volcello_preproc_cmip6(tmp_path, patched_datafinder, assert 'volume_statistics' in product.settings +def test_fx_vars_dicts_activity_preproc_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + custom_order: true + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + volume_statistics: + operator: mean + fx_files: [{short_name: areacello, + mip: Ofx, exp: piControl, + activity: CMIP}] + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + fx_var_preprocess: True + project: CMIP6 + mip: Omon + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f2 + grid: gn + additional_datasets: + - {dataset: UKESM1-0-LL, exp: ssp585} + - {dataset: UKESM1-0-LL, exp: ssp119} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.ancestors) == 1 + assert len(task.ancestors[0].ancestors) == 0 + assert 'diagnostic_name' + TASKSEP + 'fx_area-volume_stats_areacello' in [ + t.name for t in task.ancestors + ] + product_files = [ + anc_product.filename for anc_product + in task.ancestors[0].products + ] + anc_products = task.ancestors[0].products + assert len(anc_products) == 1 + assert len(product_files) == 1 + assert os.path.basename(product_files[0]) == \ + 'CMIP6_UKESM1-0-LL_Ofx_piControl_r1i1p1f2_areacello.nc' + for product in task.products: + assert 'volume_statistics' in product.settings + assert product.attributes['short_name'] == 'tos' + ancestor_product = list(anc_products)[0] + assert ancestor_product.attributes['short_name'] == 'areacello' + assert ancestor_product.attributes['mip'] == 'Ofx' + assert ancestor_product.attributes['exp'] == 'piControl' + assert ancestor_product.attributes['activity'] == 'CMIP' + assert 'annual_statistics' not in ancestor_product.settings + assert 'volume_statistics' not in ancestor_product.settings + + def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, config_user): content = dedent(""" From 16f8fd6eb2747a84d41991a5676cd2d7679a24fa Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 15:23:37 +0000 Subject: [PATCH 39/73] shuffled section at Monsegnor Mattia indication --- doc/esmvalcore/preprocessor.rst | 110 +++++++++++++++++--------------- 1 file changed, 57 insertions(+), 53 deletions(-) diff --git a/doc/esmvalcore/preprocessor.rst b/doc/esmvalcore/preprocessor.rst index 5e784d8904..6dd1f5e322 100644 --- a/doc/esmvalcore/preprocessor.rst +++ b/doc/esmvalcore/preprocessor.rst @@ -14,6 +14,7 @@ following the default order in which they are applied: * :ref:`Land/Sea/Ice masking` * :ref:`Horizontal regridding` * :ref:`Masking of missing values` +* :ref:`Preprocessing fx variables for spatial statistics` * :ref:`Multi-model statistics` * :ref:`Time operations` * :ref:`Area operations` @@ -414,6 +415,62 @@ variable items e.g.: .. _masking of missing values: +Missing values masks +-------------------- + +Missing (masked) values can be a nuisance especially when dealing with +multimodel ensembles and having to compute multimodel statistics; different +numbers of missing data from dataset to dataset may introduce biases and +artifically assign more weight to the datasets that have less missing +data. This is handled in ESMValTool via the missing values masks: two types of +such masks are available, one for the multimodel case and another for the +single model case. + +The multimodel missing values mask (``mask_fillvalues``) is a preprocessor step +that usually comes after all the single-model steps (regridding, area selection +etc) have been performed; in a nutshell, it combines missing values masks from +individual models into a multimodel missing values mask; the individual model +masks are built according to common criteria: the user chooses a time window in +which missing data points are counted, and if the number of missing data points +relative to the number of total data points in a window is less than a chosen +fractional theshold, the window is discarded i.e. all the points in the window +are masked (set to missing). + +.. code-block:: yaml + + preprocessors: + missing_values_preprocessor: + mask_fillvalues: + threshold_fraction: 0.95 + min_value: 19.0 + time_window: 10.0 + +In the example above, the fractional threshold for missing data vs. total data +is set to 95% and the time window is set to 10.0 (units of the time coordinate +units). Optionally, a minimum value threshold can be applied, in this case it +is set to 19.0 (in units of the variable units). + +See also :func:`esmvalcore.preprocessor.mask_fillvalues`. + +.. note:: + + It is possible to use ``mask_fillvalues`` to create a combined multimodel + mask (all the masks from all the analyzed models combined into a single + mask); for that purpose setting the ``threshold_fraction`` to 0 will not + discard any time windows, essentially keeping the original model masks and + combining them into a single mask; here is an example: + + .. code-block:: yaml + + preprocessors: + missing_values_preprocessor: + mask_fillvalues: + threshold_fraction: 0.0 # keep all missing values + min_value: -1e20 # small enough not to alter the data + # time_window: 10.0 # this will not matter anymore + + +.. _preprocessing fx variables for spatial statistics: Using mask files for area/volume/zonal statistics ------------------------------------------------- @@ -500,59 +557,6 @@ this setup will run the custom-ordered preprocessor steps in `preprocessors/prep in this case, by providing the extra CMOR keys, we allow the code to find the `volcello` with mip `Omon` and experiment `historical`. This is particularly useful when the actual fx data is not found in the master variable's (`thetao` in this case) data experiment; this is also useful to force a certain time-dependent data (mip: Omon) for preprocessors that need time operations -Missing values masks --------------------- - -Missing (masked) values can be a nuisance especially when dealing with -multimodel ensembles and having to compute multimodel statistics; different -numbers of missing data from dataset to dataset may introduce biases and -artifically assign more weight to the datasets that have less missing -data. This is handled in ESMValTool via the missing values masks: two types of -such masks are available, one for the multimodel case and another for the -single model case. - -The multimodel missing values mask (``mask_fillvalues``) is a preprocessor step -that usually comes after all the single-model steps (regridding, area selection -etc) have been performed; in a nutshell, it combines missing values masks from -individual models into a multimodel missing values mask; the individual model -masks are built according to common criteria: the user chooses a time window in -which missing data points are counted, and if the number of missing data points -relative to the number of total data points in a window is less than a chosen -fractional theshold, the window is discarded i.e. all the points in the window -are masked (set to missing). - -.. code-block:: yaml - - preprocessors: - missing_values_preprocessor: - mask_fillvalues: - threshold_fraction: 0.95 - min_value: 19.0 - time_window: 10.0 - -In the example above, the fractional threshold for missing data vs. total data -is set to 95% and the time window is set to 10.0 (units of the time coordinate -units). Optionally, a minimum value threshold can be applied, in this case it -is set to 19.0 (in units of the variable units). - -See also :func:`esmvalcore.preprocessor.mask_fillvalues`. - -.. note:: - - It is possible to use ``mask_fillvalues`` to create a combined multimodel - mask (all the masks from all the analyzed models combined into a single - mask); for that purpose setting the ``threshold_fraction`` to 0 will not - discard any time windows, essentially keeping the original model masks and - combining them into a single mask; here is an example: - - .. code-block:: yaml - - preprocessors: - missing_values_preprocessor: - mask_fillvalues: - threshold_fraction: 0.0 # keep all missing values - min_value: -1e20 # small enough not to alter the data - # time_window: 10.0 # this will not matter anymore Minimum, maximum and interval masking ------------------------------------- From bd1a0aca3bdff251b39ca9227224fbddb21262ab Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 15:46:49 +0000 Subject: [PATCH 40/73] added note on running preproc steps on various time and non time fx vars --- doc/esmvalcore/preprocessor.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/esmvalcore/preprocessor.rst b/doc/esmvalcore/preprocessor.rst index 6dd1f5e322..1c163e5a9e 100644 --- a/doc/esmvalcore/preprocessor.rst +++ b/doc/esmvalcore/preprocessor.rst @@ -557,6 +557,15 @@ this setup will run the custom-ordered preprocessor steps in `preprocessors/prep in this case, by providing the extra CMOR keys, we allow the code to find the `volcello` with mip `Omon` and experiment `historical`. This is particularly useful when the actual fx data is not found in the master variable's (`thetao` in this case) data experiment; this is also useful to force a certain time-dependent data (mip: Omon) for preprocessors that need time operations +.. note:: + + In the case of fx variables that don't have a time coordinate (e.g. Ofx or Efx), + the preprocessor chain is run but the time preprocessor steps are excluded and + not run to avoid code failure. If you intent to run and use the result of a certain + time-preprocessing step, make sure you request an fx variable that has a time coordinate + e.g. instead of using mip Ofx, use Omon. If data is not available for Omon then + that preprocessor step can not be run and the tool will raise a data not found error. + Minimum, maximum and interval masking ------------------------------------- From 68fb173d7da886a1c193fc0757fefe416ff6068d Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 15:56:26 +0000 Subject: [PATCH 41/73] added to API --- esmvalcore/preprocessor/_area.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 48f83c9212..a5621b4a98 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -221,6 +221,10 @@ def area_statistics(cube, operator, fx_files=None): | `max` | Maximum value | +------------+--------------------------------------------------+ + If fx_files is provided and the variable object has fx_var_preprocess + set to True, the variable's preprocessor chain will be applied to each of + the requested fx variables, up to and not including this step. + Parameters ---------- cube: iris.cube.Cube @@ -228,8 +232,11 @@ def area_statistics(cube, operator, fx_files=None): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance - fx_files: dict - dictionary of field:filename for the fx_files + fx_files: list + list of field:short_name for the fx variables requested or + list of field:dict for the fx variables requested, including + but not limited to: short_name, mip, experiment etc (at least + short_name required) Returns ------- From 1a6b5518c64f38a978e9bb7c936d57a17b06d1bb Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 15:56:38 +0000 Subject: [PATCH 42/73] added to API --- esmvalcore/preprocessor/_volume.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 4859b73639..4a0a2e966b 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -176,7 +176,9 @@ def volume_statistics( The volume average is weighted acoording to the cell volume. Cell volume is calculated from iris's cartography tool multiplied by the cell - thickness. + thickness. If fx_files is provided and the variable object has fx_var_preprocess + set to True, the variable's preprocessor chain will be applied to each of + the requested fx variables, up to and not including this step. Parameters ---------- @@ -184,8 +186,11 @@ def volume_statistics( Input cube. operator: str The operation to apply to the cube, options are: 'mean'. - fx_files: dict - dictionary of field:filename for the fx_files + fx_files: list + list of field:short_name for the fx variables requested or + list of field:dict for the fx variables requested, including + but not limited to: short_name, mip, experiment etc (at least + short_name required) Returns ------- From 1a2bb90154f15c2c3abe10aca18be5ad9dae05aa Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 24 Jan 2020 16:38:28 +0000 Subject: [PATCH 43/73] fixed linter --- esmvalcore/preprocessor/_volume.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 4a0a2e966b..bb3e215d4b 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -176,7 +176,8 @@ def volume_statistics( The volume average is weighted acoording to the cell volume. Cell volume is calculated from iris's cartography tool multiplied by the cell - thickness. If fx_files is provided and the variable object has fx_var_preprocess + thickness. If fx_files is provided and the variable object has + fx_var_preprocess set to True, the variable's preprocessor chain will be applied to each of the requested fx variables, up to and not including this step. From ef70bb3922e795dd101f80cfc9d43633b5858ffc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 12:02:26 +0000 Subject: [PATCH 44/73] removed check on OBS mips --- esmvalcore/_recipe.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 43de9a4861..675623d5c7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -369,11 +369,9 @@ def _add_fxvar_keys(fx_var_dict, variable): # set variable names fx_variable['variable_group'] = fx_var_dict['short_name'] - # specificities of project + # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': fx_variable['ensemble'] = 'r0i0p0' - elif fx_variable['project'] in ['OBS', 'OBS6', 'obs4mips']: - fx_variable['mip'] = 'fx' # add missing cmor info _add_cmor_info(fx_variable, override=True) From 5460f44d9b86d96b89482c213eeac61bfe008dd1 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 12:45:02 +0000 Subject: [PATCH 45/73] made the preproccing of fx vars in preproc step default and always on --- esmvalcore/_recipe.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 675623d5c7..98ac264d2b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -12,7 +12,8 @@ from . import __version__ from . import _recipe_checks as check -from ._config import TAGS, get_activity, get_institutes, replace_tags +from ._config import (TAGS, get_activity, get_institutes, get_project_config, + replace_tags) from ._data_finder import (get_input_filelist, get_output_file, get_statistic_output_file) from ._provenance import TrackedFile, get_recipe_provenance @@ -388,6 +389,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): # assemble info from master variable var = dict(variable) var_project = variable['project'] + get_project_config(var_project) cmor_table = CMOR_TABLES[var_project] valid_fx_vars = [] @@ -503,7 +505,6 @@ def _update_fx_settings(settings, variable, config_user): for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): var = dict(variable) - cmor_fxvars = [] fx_files_dict = {} if settings.get(step, {}).get('fx_files'): var['fx_files'] = settings.get(step, {}).get('fx_files') @@ -514,19 +515,12 @@ def _update_fx_settings(settings, variable, config_user): if not fxvar_name: raise RecipeError( "Key short_name missing from {}".format(fxvar)) - fx_file, cmor_fx_var = _get_correct_fx_file( + _, cmor_fx_var = _get_correct_fx_file( variable, fxvar, config_user) - fx_files_dict[fxvar_name] = fx_file if cmor_fx_var: - cmor_fxvars.append(cmor_fx_var) - - # special case for preprocessing fx variables - if var.get("fx_var_preprocess"): - fx_files_dict = { - cmor_fx_var['short_name']: - get_output_file(cmor_fx_var, config_user['preproc_dir']) - for cmor_fx_var in cmor_fxvars - } + fx_files_dict[fxvar_name] = get_output_file( + cmor_fx_var, + config_user['preproc_dir']) # finally construct the fx files dictionary settings[step]['fx_files'] = fx_files_dict @@ -747,7 +741,8 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_regrid_time(variable, settings) ancestors = grouped_ancestors.get(variable['filename']) - if not ancestors or variable.get("fx_var_preprocess"): + fx_files_in_settings = variable.get('fx_files', None) + if not ancestors or fx_files_in_settings: ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') and not ancestors: logger.info("Skipping: no data found for %s", variable) @@ -936,8 +931,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): # special case: fx variable pre-processing for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): - if profile.get(step, {}).get('fx_files') and \ - variable.get("fx_var_preprocess"): + if profile.get(step, {}).get('fx_files'): # conserve profile fx_profile = deepcopy(profile) fx_vars = profile.get(step, {}).get('fx_files') From aec6cad3db3a39a38cb085821770ce2689ed13c4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 12:45:27 +0000 Subject: [PATCH 46/73] removed redundant fx)var_preprocess flag --- tests/integration/test_recipe.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 70f8b2953b..486fa8ace2 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1887,7 +1887,6 @@ def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, variables: tos: preprocessor: preproc - fx_var_preprocess: True project: CMIP6 mip: Omon exp: historical @@ -1960,7 +1959,6 @@ def test_fx_vars_dicts_preproc_cmip6(tmp_path, patched_datafinder, variables: tos: preprocessor: preproc - fx_var_preprocess: True project: CMIP6 mip: Omon exp: historical @@ -2030,7 +2028,6 @@ def test_fx_vars_dicts_activity_preproc_cmip6(tmp_path, patched_datafinder, variables: tos: preprocessor: preproc - fx_var_preprocess: True project: CMIP6 mip: Omon start_year: 2000 From e070f7115f22de35520e49f158928c12c946fe09 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 12:51:10 +0000 Subject: [PATCH 47/73] changed doc to remove pointers to defunct fx_var_preprocess flag --- doc/esmvalcore/preprocessor.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/esmvalcore/preprocessor.rst b/doc/esmvalcore/preprocessor.rst index 1c163e5a9e..50797761a0 100644 --- a/doc/esmvalcore/preprocessor.rst +++ b/doc/esmvalcore/preprocessor.rst @@ -482,7 +482,7 @@ can be preprocessed: Key features: -- it is possible to run a set of preprocessor steps on the `fx_files: [vars or dicts of vars]` specified in the `area_statistics`, `volume_statistics` or `zonal_statistics`by setting the `fx_var_preprocess: True` in the `variable`; take for instance this example: +- preprocessor steps will be run on the `fx_files: [vars or dicts of vars]` specified in the `area_statistics`, `volume_statistics` or `zonal_statistics` take for instance this example: .. code-block:: yaml @@ -504,7 +504,6 @@ Key features: thetao700_Omon: short_name: thetao preprocessor: prep - fx_var_preprocess: True mip: Omon exp: historical ensemble: r1i1p1f1 @@ -542,7 +541,6 @@ this setup will run the custom-ordered preprocessor steps in `preprocessors/prep thetao700_Omon: short_name: thetao preprocessor: prep - fx_var_preprocess: True mip: Omon exp: historical ensemble: r1i1p1f1 From c535aabd74efdab7da8c2611832f4a22e0f01e1c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 14:11:14 +0000 Subject: [PATCH 48/73] actual correct setter for fx files preproc --- esmvalcore/_recipe.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 98ac264d2b..f4a3192ed2 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -741,7 +741,9 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, ) _update_regrid_time(variable, settings) ancestors = grouped_ancestors.get(variable['filename']) - fx_files_in_settings = variable.get('fx_files', None) + fx_files_in_settings = [ + setting.get('fx_files', None) for setting in settings.values() + ] if not ancestors or fx_files_in_settings: ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') and not ancestors: From f152ed48bbd0c0ac54bc021302591d1418333db7 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 28 Jan 2020 14:49:24 +0000 Subject: [PATCH 49/73] actually fixed now --- esmvalcore/_recipe.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index f4a3192ed2..c8b1f55d31 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -744,6 +744,9 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, fx_files_in_settings = [ setting.get('fx_files', None) for setting in settings.values() ] + fx_files_in_settings = [ + x for x in fx_files_in_settings if x is not None + ] if not ancestors or fx_files_in_settings: ancestors = _get_ancestors(variable, config_user) if config_user.get('skip-nonexistent') and not ancestors: From af106794b83522482e815f7e6b9bb8ad00f2f0a4 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 29 Jan 2020 10:27:03 +0100 Subject: [PATCH 50/73] Fixed prospector issues --- esmvalcore/_recipe.py | 57 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c8b1f55d31..6af1304aa7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -143,13 +143,13 @@ def _special_name_to_dataset(variable, special_name): if special_name in ('reference_dataset', 'alternative_dataset'): if special_name not in variable: raise RecipeError( - "Preprocessor {} uses {}, but {} is not defined for " - "variable {} of diagnostic {}".format( - variable['preprocessor'], - special_name, - special_name, - variable['short_name'], - variable['diagnostic'], + "Preprocessor {preproc} uses {name}, but {name} is not " + "defined for variable {short_name} of diagnostic " + "{diagnostic}".format( + preproc=variable['preprocessor'], + name=special_name, + short_name=variable['short_name'], + diagnostic=variable['diagnostic'], )) special_name = variable[special_name] @@ -380,11 +380,14 @@ def _add_fxvar_keys(fx_var_dict, variable): return fx_variable -def _get_correct_fx_file(variable, fx_varname, config_user): +def _get_correct_fx_file(variable, fx_variable, config_user): """Get fx files (searching all possible mips).""" # make it a dict - if not isinstance(fx_varname, dict): - fx_varname = {'short_name': fx_varname} + if not isinstance(fx_variable, dict): + fx_varname = fx_variable + fx_variable = {'short_name': fx_variable} + else: + fx_varname = fx_variable['short_name'] # assemble info from master variable var = dict(variable) @@ -400,7 +403,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): fx_mips.append(variable['mip']) # force only the mip declared by user - user_fx_mip = fx_varname.get('mip') + user_fx_mip = fx_variable.get('mip') if user_fx_mip: fx_mips = [ user_fx_mip, @@ -411,18 +414,18 @@ def _get_correct_fx_file(variable, fx_varname, config_user): searched_mips = [] fx_files = [] for fx_mip in fx_mips: - fx_variable = cmor_table.get_variable(fx_mip, fx_varname['short_name']) - if fx_variable is not None: + fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) + if fx_cmor_variable is not None: searched_mips.append(fx_mip) - fx_varname['mip'] = fx_mip - fx_var = _add_fxvar_keys(fx_varname, var) + fx_variable['mip'] = fx_mip + fx_variable = _add_fxvar_keys(fx_variable, var) logger.debug("For fx variable '%s', found table '%s'", fx_varname, fx_mip) - fx_files = _get_input_files(fx_var, config_user) + fx_files = _get_input_files(fx_variable, config_user) # If files found, return them if fx_files: - valid_fx_vars.append(fx_var) + valid_fx_vars.append(fx_variable) logger.debug("Found fx variables '%s':\n%s", fx_varname, pformat(fx_files)) break @@ -432,9 +435,10 @@ def _get_correct_fx_file(variable, fx_varname, config_user): raise RecipeError( f"Requested fx variable '{fx_varname}' not available in " f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + # flag a warning if not fx_files: - logger.warning("Missing data for fx variable {}".format(fx_varname)) + logger.warning("Missing data for fx variable '%s'", fx_varname) # allow for empty lists corrected for by NE masks if fx_files: @@ -887,7 +891,7 @@ def append(group_prefix, var): def _remove_time_preproc(fxprofile): - "Remove all time preprocessors from the fx profile." "" + """Remove all time preprocessors from the fx profile.""" fxprofile = deepcopy(fxprofile) for key in fxprofile: if key in TIME_PREPROCESSORS: @@ -996,7 +1000,7 @@ def _check_duplication(task): ancestors_filename_dict[anc_product.filename] = ancestor_task filenames.append(anc_product.filename) set_ancestors = list( - set([ancestors_filename_dict[filename] for filename in filenames])) + {ancestors_filename_dict[filename] for filename in filenames}) task.ancestors = [ ancestor for ancestor in task.ancestors if ancestor in set_ancestors @@ -1090,10 +1094,10 @@ def _initialize_datasets(raw_datasets): @staticmethod def _expand_ensemble(variables): - """ - Expand ensemble members to multiple datasets + """Expand ensemble members to multiple datasets. + + Expansion only support ensembles defined as strings, not lists. - Expansion only support ensembles defined as strings, not lists """ expanded = [] regex = re.compile(r'\(\d+:\d+\)') @@ -1187,8 +1191,7 @@ def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, return preprocessor_output def _set_alias(self, preprocessor_output): - """ - Add unique alias for datasets. + """Add unique alias for datasets. Generates a unique alias for each dataset that will be shared by all variables. Tries to make it as small as possible to make it useful for @@ -1205,7 +1208,7 @@ def _set_alias(self, preprocessor_output): Function will not modify alias if it is manually added to the recipe but it will use the dataset info to compute the others - Examples: + Examples -------- - {project: CMIP5, model: EC-Earth, ensemble: r1i1p1} - {project: CMIP6, model: EC-Earth, ensemble: r1i1p1f1} @@ -1229,7 +1232,7 @@ def _set_alias(self, preprocessor_output): - {project: CMIP5, model: EC-Earth, experiment: historical} will generate alias 'EC-Earth' - Parameters: + Parameters ---------- preprocessor_output : dict preprocessor output dictionary From 13cf2b1b08595080b7040a6427ec277c11560792 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 29 Jan 2020 13:25:05 +0100 Subject: [PATCH 51/73] Small optimizations in fx file retrieval --- esmvalcore/_recipe.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 6af1304aa7..88d7471bf3 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -403,11 +403,8 @@ def _get_correct_fx_file(variable, fx_variable, config_user): fx_mips.append(variable['mip']) # force only the mip declared by user - user_fx_mip = fx_variable.get('mip') - if user_fx_mip: - fx_mips = [ - user_fx_mip, - ] + if 'mip' in fx_variable: + fx_mips = [fx_variable['mip']] # Search all mips for available variables # priority goes to user specified mip if available @@ -416,16 +413,17 @@ def _get_correct_fx_file(variable, fx_variable, config_user): for fx_mip in fx_mips: fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) if fx_cmor_variable is not None: + fx_var_dict = dict(fx_variable) searched_mips.append(fx_mip) - fx_variable['mip'] = fx_mip - fx_variable = _add_fxvar_keys(fx_variable, var) + fx_var_dict['mip'] = fx_mip + fx_var_dict = _add_fxvar_keys(fx_var_dict, var) logger.debug("For fx variable '%s', found table '%s'", fx_varname, fx_mip) - fx_files = _get_input_files(fx_variable, config_user) + fx_files = _get_input_files(fx_var_dict, config_user) # If files found, return them if fx_files: - valid_fx_vars.append(fx_variable) + valid_fx_vars.append(fx_var_dict) logger.debug("Found fx variables '%s':\n%s", fx_varname, pformat(fx_files)) break @@ -746,10 +744,8 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, _update_regrid_time(variable, settings) ancestors = grouped_ancestors.get(variable['filename']) fx_files_in_settings = [ - setting.get('fx_files', None) for setting in settings.values() - ] - fx_files_in_settings = [ - x for x in fx_files_in_settings if x is not None + setting.get('fx_files') for setting in settings.values() if + setting.get('fx_files') is not None ] if not ancestors or fx_files_in_settings: ancestors = _get_ancestors(variable, config_user) From 2b46632fc0d1403cc7398f7d126fd71b9d598fe7 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 29 Jan 2020 14:44:23 +0100 Subject: [PATCH 52/73] Expanded unit tests --- esmvalcore/_recipe.py | 2 +- tests/integration/test_recipe.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 88d7471bf3..a2c4464cc5 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -959,7 +959,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): before, _ = _split_settings(fx_profile, step, order) # remove time preprocessors for any fx/Ofx/Efx/etc # that dont have time coords - if 'fx' in fx_variable['mip']: + if fx_variable['frequency'] == 'fx': before = _remove_time_preproc(before) fx_name = task_name.split( TASKSEP)[0] + TASKSEP + 'fx_area-volume_stats_' + \ diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 486fa8ace2..c20c7c6b13 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1872,12 +1872,16 @@ def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, content = dedent(""" preprocessors: preproc: - custom_order: true + regrid: + target_grid: 1x1 + scheme: linear extract_volume: z_min: 0 z_max: 100 annual_statistics: operator: mean + convert_units: + units: K volume_statistics: operator: mean fx_files: ['areacello', 'volcello'] @@ -1918,9 +1922,15 @@ def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, for ancestor_product in task.ancestors[0].products: assert ancestor_product.attributes['short_name'] == 'areacello' assert 'volume_statistics' not in ancestor_product.settings + assert 'convert_units' not in ancestor_product.settings + assert 'regrid' in ancestor_product.settings + assert 'extract_volume' in ancestor_product.settings for ancestor_product in task.ancestors[1].products: assert ancestor_product.attributes['short_name'] == 'volcello' assert 'volume_statistics' not in ancestor_product.settings + assert 'convert_units' not in ancestor_product.settings + assert 'regrid' in ancestor_product.settings + assert 'extract_volume' in ancestor_product.settings assert len(task.products) == 1 product = task.products.pop() assert product.attributes['short_name'] == 'tos' From c4877dd99c53672a944b7411ccfc6199917fb14f Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 29 Jan 2020 15:06:44 +0100 Subject: [PATCH 53/73] Fixed docstrings --- esmvalcore/preprocessor/_area.py | 5 ++--- esmvalcore/preprocessor/_volume.py | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index a5621b4a98..ed39f22d09 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -221,9 +221,8 @@ def area_statistics(cube, operator, fx_files=None): | `max` | Maximum value | +------------+--------------------------------------------------+ - If fx_files is provided and the variable object has fx_var_preprocess - set to True, the variable's preprocessor chain will be applied to each of - the requested fx variables, up to and not including this step. + If fx_files is provided, the variable's preprocessor chain will be applied + to each of the requested fx variables, up to and not including this step. Parameters ---------- diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index bb3e215d4b..7c1ed532da 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -176,10 +176,9 @@ def volume_statistics( The volume average is weighted acoording to the cell volume. Cell volume is calculated from iris's cartography tool multiplied by the cell - thickness. If fx_files is provided and the variable object has - fx_var_preprocess - set to True, the variable's preprocessor chain will be applied to each of - the requested fx variables, up to and not including this step. + thickness. If fx_files is provided, the variable's preprocessor chain will + be applied to each of the requested fx variables, up to and not including + this step. Parameters ---------- From 10017548ae0bb6733f8a1e166ce5aba78766b75d Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 29 Jan 2020 15:51:26 +0100 Subject: [PATCH 54/73] Added two more tests --- tests/integration/test_recipe.py | 94 ++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index c20c7c6b13..21c42236e3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1867,6 +1867,58 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, assert '_Omon_' not in fx_files['volcello'] +def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + regrid: + target_grid: 1x1 + scheme: linear + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + convert_units: + units: K + area_statistics: + operator: mean + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tos' + assert len(task.ancestors) == 0 + assert len(task.products) == 1 + product = task.products.pop() + assert product.attributes['short_name'] == 'tos' + assert product.files + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert 'fx_files' not in settings + + def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" @@ -1946,6 +1998,48 @@ def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, fx_files['areacello'] +def test_fx_vars_list_preproc_cmip6_fail(tmp_path, patched_failing_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + regrid: + target_grid: 1x1 + scheme: linear + extract_volume: + z_min: 0 + z_max: 100 + annual_statistics: + operator: mean + convert_units: + units: K + volume_statistics: + operator: mean + fx_files: ['areacello', 'volcello'] + + diagnostics: + diagnostic_name: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + with pytest.raises(RecipeError) as rec_err_exp: + get_recipe(tmp_path, content, config_user) + msg = ('One or more of volume_statistics fx data for tos are missing. ' + 'Task can not be performed since there is no fx data found.') + assert rec_err == msg + + def test_fx_vars_dicts_preproc_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" From 99c2aaee3a9497c3ad7a1cc4e5dce77b8ca6ff2e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 30 Jan 2020 11:33:34 +0000 Subject: [PATCH 55/73] fixed test --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 21c42236e3..360a0ff019 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2033,7 +2033,7 @@ def test_fx_vars_list_preproc_cmip6_fail(tmp_path, patched_failing_datafinder, - {dataset: CanESM5} scripts: null """) - with pytest.raises(RecipeError) as rec_err_exp: + with pytest.raises(RecipeError) as rec_err: get_recipe(tmp_path, content, config_user) msg = ('One or more of volume_statistics fx data for tos are missing. ' 'Task can not be performed since there is no fx data found.') From 147b151cf344a355c44fb1ce7d06bc35b48ec75f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 30 Jan 2020 11:45:19 +0000 Subject: [PATCH 56/73] slight docstring mod --- esmvalcore/preprocessor/_volume.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 7c1ed532da..0f0bb5ba66 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -187,10 +187,10 @@ def volume_statistics( operator: str The operation to apply to the cube, options are: 'mean'. fx_files: list - list of field:short_name for the fx variables requested or + list of field:str short_name for the fx variables requested or list of field:dict for the fx variables requested, including - but not limited to: short_name, mip, experiment etc (at least - short_name required) + but not limited to: keys: short_name, mip, experiment etc (at least + short_name required if dict) Returns ------- From b4ccec4de0facabd8774fbedd1a60c96584ad672 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 30 Jan 2020 11:45:34 +0000 Subject: [PATCH 57/73] slight docstring mod --- esmvalcore/preprocessor/_area.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index ed39f22d09..9d26b2ac3f 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -232,10 +232,10 @@ def area_statistics(cube, operator, fx_files=None): The operation, options: mean, median, min, max, std_dev, sum, variance fx_files: list - list of field:short_name for the fx variables requested or + list of field:str short_name for the fx variables requested or list of field:dict for the fx variables requested, including - but not limited to: short_name, mip, experiment etc (at least - short_name required) + but not limited to: keys: short_name, mip, experiment etc (at least + short_name required if dict) Returns ------- From 87931bae93fdeecbc4407f07a7c6645ea998326b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 30 Jan 2020 17:57:03 +0000 Subject: [PATCH 58/73] fixed the last duplicate preproc file error reported by Lee, test will follow shortly --- esmvalcore/_recipe.py | 54 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index a2c4464cc5..927c08e397 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1359,6 +1359,7 @@ def _resolve_diagnostic_ancestors(self, tasks): def initialize_tasks(self): """Define tasks in recipe.""" logger.info("Creating tasks from recipe") + prelim_tasks = set() tasks = set() priority = 0 @@ -1383,9 +1384,60 @@ def initialize_tasks(self): _check_duplication(task) for task0 in task.flatten(): task0.priority = priority - tasks.add(task) + prelim_tasks.add(task) priority += 1 + # remove tasks with the same product output keeping only one + anc_filenames = [] + ancestor_tasks = [] + anc_tasks = {} + for task in prelim_tasks: + if isinstance(task, PreprocessingTask): + if task.ancestors: + for ancestor_task in task.ancestors: + for anc_product in ancestor_task.products: + anc_filenames.append(anc_product.filename) + ancestor_tasks.append(ancestor_task) + anc_tasks[task] = (anc_filenames, ancestor_tasks) + + if anc_tasks: + all_files = [anc_tasks[task][0] for task in prelim_tasks + if anc_tasks[task][0]] + duplicates = [] + while len(all_files) > 0: + elem = all_files.pop() + if elem in all_files: + duplicates.append(elem) + doubles_ancestors = [ + anc_tasks[task][1][0] for task in prelim_tasks + if anc_tasks[task][1] and anc_tasks[task][0] + in duplicates + ] + for task in prelim_tasks: + print("ANCS", [t.name for t in task.ancestors]) + if not isinstance(task, PreprocessingTask): + tasks.add(task) + priority += 1 + else: + if not anc_tasks[task][1]: + tasks.add(task) + priority += 1 + else: + for ancestor_task in task.ancestors: + if ancestor_task not in doubles_ancestors: + tasks.add(task) + priority += 1 + else: + added_ancestors = [] + if ancestor_task.name in added_ancestors: + added_ancestors.append(ancestor_task.name) + tasks.add(task) + priority += 1 + else: + task.ancestors.remove(ancestor_task) + tasks.add(task) + priority += 1 + # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): task_name = diagnostic_name + TASKSEP + script_name From f827ce5a19f870a520bda9a2c13b2c622502fe63 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 30 Jan 2020 17:59:50 +0000 Subject: [PATCH 59/73] removed print statement --- esmvalcore/_recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 927c08e397..0a2aa9540b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1414,7 +1414,6 @@ def initialize_tasks(self): in duplicates ] for task in prelim_tasks: - print("ANCS", [t.name for t in task.ancestors]) if not isinstance(task, PreprocessingTask): tasks.add(task) priority += 1 From b8aa4d9cd11d18be84a13bb9ac131abd60d301f7 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 12:05:46 +0000 Subject: [PATCH 60/73] overhauled method for identical ancestry removal --- esmvalcore/_recipe.py | 73 +++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 45 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 0a2aa9540b..9b2e2dc2ae 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1387,55 +1387,38 @@ def initialize_tasks(self): prelim_tasks.add(task) priority += 1 - # remove tasks with the same product output keeping only one - anc_filenames = [] - ancestor_tasks = [] - anc_tasks = {} + # remove ancestor tasks that do the same thing + # eg: fx files that are needed but can be produced + # only once + all_names = [] for task in prelim_tasks: - if isinstance(task, PreprocessingTask): - if task.ancestors: - for ancestor_task in task.ancestors: - for anc_product in ancestor_task.products: - anc_filenames.append(anc_product.filename) - ancestor_tasks.append(ancestor_task) - anc_tasks[task] = (anc_filenames, ancestor_tasks) - - if anc_tasks: - all_files = [anc_tasks[task][0] for task in prelim_tasks - if anc_tasks[task][0]] - duplicates = [] - while len(all_files) > 0: - elem = all_files.pop() - if elem in all_files: - duplicates.append(elem) - doubles_ancestors = [ - anc_tasks[task][1][0] for task in prelim_tasks - if anc_tasks[task][1] and anc_tasks[task][0] - in duplicates - ] - for task in prelim_tasks: - if not isinstance(task, PreprocessingTask): + if task.ancestors: + for ancestor_task in task.ancestors: + all_names.append(ancestor_task.name) + else: tasks.add(task) priority += 1 - else: - if not anc_tasks[task][1]: - tasks.add(task) - priority += 1 - else: - for ancestor_task in task.ancestors: - if ancestor_task not in doubles_ancestors: - tasks.add(task) - priority += 1 + + if all_names: + duplicates = [] + while len(all_names) > 0: + elem = all_names.pop() + if elem in all_names: + duplicates.append(elem) + + removed_ancestors = [] + for task in prelim_tasks: + for ancestor_task in task.ancestors: + if ancestor_task.name not in duplicates: + tasks.add(task) + priority += 1 + else: + if ancestor_task.name not in removed_ancestors: + removed_ancestors.append(ancestor_task.name) else: - added_ancestors = [] - if ancestor_task.name in added_ancestors: - added_ancestors.append(ancestor_task.name) - tasks.add(task) - priority += 1 - else: - task.ancestors.remove(ancestor_task) - tasks.add(task) - priority += 1 + task.ancestors.remove(ancestor_task) + tasks.add(task) + priority += 1 # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): From 8f9759735e6bbd0e51f8ba92f0f49c91e72c09d9 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 12:06:58 +0000 Subject: [PATCH 61/73] started work on the extra test --- tests/integration/test_recipe.py | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 360a0ff019..5807ca1c1b 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2372,3 +2372,67 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert msg in rec_err_exp + + +def test_fx_vars_duplicate_files(tmp_path, patched_datafinder, + config_user): + content = dedent(""" + preprocessors: + preproc: + custom_order: True + annual_statistics: + operator: mean + extract_region: + start_longitude: -80. + end_longitude: 30. + start_latitude: -80. + end_latitude: 80. + area_statistics: + operator: mean + fx_files: [{short_name: areacello, mip: Ofx, + exp: piControl, activity: CMIP}] + regrid_time: + frequency: yr + + diagnostics: + diag_437: + variables: + tos: + preprocessor: preproc + project: CMIP6 + mip: Omon + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f2 + grid: gn + additional_datasets: + - {dataset: UKESM1-0-LL, exp: ssp585} + - {dataset: UKESM1-0-LL, exp: ssp119} + scripts: null + diag_439: + variables: + sst: + short_name: tos + preprocessor: preproc + project: CMIP6 + mip: Omon + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f2 + grid: gn + additional_datasets: + - {dataset: UKESM1-0-LL, exp: ssp585} + sss: + short_name: tos + preprocessor: preproc + project: CMIP6 + mip: Omon + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f2 + grid: gn + additional_datasets: + - {dataset: UKESM1-0-LL, exp: ssp585} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) From 3ed2dc705f08e817dd59c565c9da71b7ebe27ebe Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 12:27:00 +0000 Subject: [PATCH 62/73] bumped priority of clean task --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 9b2e2dc2ae..28f80b6b52 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1411,7 +1411,7 @@ def initialize_tasks(self): for ancestor_task in task.ancestors: if ancestor_task.name not in duplicates: tasks.add(task) - priority += 1 + priority += 2 else: if ancestor_task.name not in removed_ancestors: removed_ancestors.append(ancestor_task.name) From da7bbbba3a42cc203477a616f11e4152e7dcdc23 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 14:43:02 +0000 Subject: [PATCH 63/73] completed test --- tests/integration/test_recipe.py | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 5807ca1c1b..47c30481da 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2436,3 +2436,38 @@ def test_fx_vars_duplicate_files(tmp_path, patched_datafinder, scripts: null """) recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 3 + all_task_names = [ + 'diag_439/sst', 'diag_437/tos', 'diag_439/sss' + ] + task_name = 'diag_437' + TASKSEP + 'tos' + task = [ + elem for elem in recipe.tasks if elem.name == task_name + ][0] + assert task.name in all_task_names + assert len(task.ancestors) == 1 + assert len(task.ancestors[0].ancestors) == 0 + anc_name = 'diag_437' + TASKSEP + 'fx_area-volume_stats_areacello' + assert anc_name in [ + t.name for t in task.ancestors + ] + for product in task.products: + assert 'regrid_time' in product.settings + assert product.attributes['short_name'] == 'tos' + for anc_product in task.ancestors[0].products: + assert anc_product.attributes['short_name'] == 'areacello' + + task_name = 'diag_439' + TASKSEP + 'sss' + task = [ + elem for elem in recipe.tasks if elem.name == task_name + ][0] + assert task.name in all_task_names + assert len(task.ancestors) == 1 + task_name = 'diag_439' + TASKSEP + 'sst' + task = [ + elem for elem in recipe.tasks if elem.name == task_name + ][0] + assert task.name in all_task_names + assert len(task.ancestors) == 0 From 8a854a69c984fa73de5d2bb3f5e817acdd67c5b3 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 15:45:18 +0000 Subject: [PATCH 64/73] fixed bug related to single product duplicity --- esmvalcore/_recipe.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 28f80b6b52..1bf375f3a7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1393,13 +1393,21 @@ def initialize_tasks(self): all_names = [] for task in prelim_tasks: if task.ancestors: - for ancestor_task in task.ancestors: - all_names.append(ancestor_task.name) + product_set = [] + for product in task.products: + for ancestor_task in task.ancestors: + product_set.append(ancestor_task.name) + for unique_task in list(set(product_set)): + all_names.append(unique_task) else: tasks.add(task) priority += 1 - if all_names: + # look for cross-tasks duplication + if not len(all_names) > 1: + tasks.add(task) + priority += 1 + else: duplicates = [] while len(all_names) > 0: elem = all_names.pop() From 431e02d2224b77ddacb13ef409b286d3ef3d72bf Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 16:08:01 +0000 Subject: [PATCH 65/73] added inline comments --- esmvalcore/_recipe.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 1bf375f3a7..e03601cb48 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1387,9 +1387,11 @@ def initialize_tasks(self): prelim_tasks.add(task) priority += 1 - # remove ancestor tasks that do the same thing - # eg: fx files that are needed but can be produced - # only once + # remove ancestor tasks that perform the same operation + # and write to the same Preprocessor file + # eg: fx files that are needed multiple times but can be produced + # only once; this is an inter-tasks check. + # The equivanet intra-task check is done by _check_duplication all_names = [] for task in prelim_tasks: if task.ancestors: @@ -1403,7 +1405,8 @@ def initialize_tasks(self): tasks.add(task) priority += 1 - # look for cross-tasks duplication + # look for inter-tasks duplication by name + # this should be safe now once we've set-filtered by product if not len(all_names) > 1: tasks.add(task) priority += 1 @@ -1418,6 +1421,7 @@ def initialize_tasks(self): for task in prelim_tasks: for ancestor_task in task.ancestors: if ancestor_task.name not in duplicates: + # list:duplicates can be empty, just add the task tasks.add(task) priority += 2 else: From ae128e24fda2d030159c442c531941eb06dc1935 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 31 Jan 2020 16:08:21 +0000 Subject: [PATCH 66/73] tast now works at any time --- tests/integration/test_recipe.py | 34 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 47c30481da..99d0937a30 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2459,15 +2459,25 @@ def test_fx_vars_duplicate_files(tmp_path, patched_datafinder, for anc_product in task.ancestors[0].products: assert anc_product.attributes['short_name'] == 'areacello' - task_name = 'diag_439' + TASKSEP + 'sss' - task = [ - elem for elem in recipe.tasks if elem.name == task_name - ][0] - assert task.name in all_task_names - assert len(task.ancestors) == 1 - task_name = 'diag_439' + TASKSEP + 'sst' - task = [ - elem for elem in recipe.tasks if elem.name == task_name - ][0] - assert task.name in all_task_names - assert len(task.ancestors) == 0 + # identify tasks + task_name_a = 'diag_439' + TASKSEP + 'sss' + task_a = [ + elem for elem in recipe.tasks if elem.name == task_name_a + ] + assert task_a + task_a = task_a[0] + task_name_b = 'diag_439' + TASKSEP + 'sst' + task_b = [ + elem for elem in recipe.tasks if elem.name == task_name_b + ] + assert task_b + task_b = task_b[0] + + # test removal of identical ancestors across tasks + # both task_a and task_b have the same ancestor; one + # will be removed; process is random so + # either task_a has no ancestors or task_b has no ancestors + if len(task_a.ancestors) == 1: + assert len(task_b.ancestors) == 0 + elif len(task_b.ancestors) == 1: + assert len(task_a.ancestors) == 0 From 39a34b52f3d10207a40caaf87d801a4a99e01f7f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 6 Feb 2020 15:48:22 +0000 Subject: [PATCH 67/73] fixed case when there are no tasks at all --- esmvalcore/_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 7433eeb34f..df0cb8279c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1413,7 +1413,7 @@ def initialize_tasks(self): # look for inter-tasks duplication by name # this should be safe now once we've set-filtered by product - if not len(all_names) > 1: + if not len(all_names) > 1 and prelim_tasks: tasks.add(task) priority += 1 else: From 8c907985d69c5765c608d4c07a94e42cfff458b2 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 17 Feb 2020 12:49:38 +0000 Subject: [PATCH 68/73] fixed crass bug noticed by Lee --- esmvalcore/_recipe.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index df0cb8279c..7e97849cbf 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1414,8 +1414,7 @@ def initialize_tasks(self): # look for inter-tasks duplication by name # this should be safe now once we've set-filtered by product if not len(all_names) > 1 and prelim_tasks: - tasks.add(task) - priority += 1 + tasks = prelim_tasks else: duplicates = [] while len(all_names) > 0: From 18cb3f670a4d3c0ce41d65694c466443750a906e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 18 Feb 2020 17:04:59 +0000 Subject: [PATCH 69/73] priority ordering of tasks based on importance wrt ancestry --- esmvalcore/_recipe.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 2bcf233f5c..0f6ccb7ff2 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1408,17 +1408,23 @@ def initialize_tasks(self): removed_ancestors = [] for task in prelim_tasks: for ancestor_task in task.ancestors: + # add tasks and assign priorities as rule: + # no duplicates: highest +3 + # first duplicate (not removed): high +2 + # all others with deleted ancestors: low +1 if ancestor_task.name not in duplicates: # list:duplicates can be empty, just add the task tasks.add(task) - priority += 2 + priority += 3 else: if ancestor_task.name not in removed_ancestors: removed_ancestors.append(ancestor_task.name) + tasks.add(task) + priority += 2 else: task.ancestors.remove(ancestor_task) - tasks.add(task) - priority += 1 + tasks.add(task) + priority += 1 # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): From b80e47ef6422930b6b02a1c97a559e19c57d6b82 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Tue, 18 Feb 2020 18:06:49 +0000 Subject: [PATCH 70/73] rejiggled priorities since the sorting is done not in reverse order --- esmvalcore/_recipe.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 0f6ccb7ff2..bcea9483f3 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1408,10 +1408,7 @@ def initialize_tasks(self): removed_ancestors = [] for task in prelim_tasks: for ancestor_task in task.ancestors: - # add tasks and assign priorities as rule: - # no duplicates: highest +3 - # first duplicate (not removed): high +2 - # all others with deleted ancestors: low +1 + # add tasks and assign priorities if ancestor_task.name not in duplicates: # list:duplicates can be empty, just add the task tasks.add(task) @@ -1419,12 +1416,11 @@ def initialize_tasks(self): else: if ancestor_task.name not in removed_ancestors: removed_ancestors.append(ancestor_task.name) - tasks.add(task) - priority += 2 + priority -= 2 else: task.ancestors.remove(ancestor_task) - tasks.add(task) priority += 1 + tasks.add(task) # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): From b129407994858720bdc3792833ed2f8ad815539b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 19 Feb 2020 14:18:28 +0000 Subject: [PATCH 71/73] fixed major fx file generation bug --- esmvalcore/_recipe.py | 102 ++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index bcea9483f3..19dd7301e1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -508,8 +508,9 @@ def _update_fx_settings(settings, variable, config_user): variable, fxvar, config_user) if cmor_fx_var: fx_files_dict[fxvar_name] = get_output_file( - cmor_fx_var, - config_user['preproc_dir']) + var, + config_user['preproc_dir'], + fx_var_alias=cmor_fx_var) # finally construct the fx files dictionary settings[step]['fx_files'] = fx_files_dict @@ -694,13 +695,21 @@ def get_matching(attributes): def _get_preprocessor_products(variables, profile, order, ancestor_products, - config_user): + config_user, fx_case=False): """Get preprocessor product definitions for a set of datasets.""" products = set() - for variable in variables: - variable['filename'] = get_output_file(variable, - config_user['preproc_dir']) + if not fx_case: + for variable in variables: + variable['filename'] = get_output_file(variable, + config_user['preproc_dir']) + else: + parent_variable = variables[1] + variables = [variables[0]] + for variable in variables: + variable['filename'] = get_output_file(parent_variable, + config_user['preproc_dir'], + fx_var_alias=variable) if ancestor_products: grouped_ancestors = _match_products(ancestor_products, variables) @@ -762,7 +771,8 @@ def _get_single_preprocessor_task(variables, profile, config_user, name, - ancestor_tasks=None): + ancestor_tasks=None, + fx_case=False): """Create preprocessor tasks for a set of datasets w/ special case fx.""" if ancestor_tasks is None: ancestor_tasks = [] @@ -773,13 +783,23 @@ def _get_single_preprocessor_task(variables, check.check_for_temporal_preprocs(profile) ancestor_products = None - products = _get_preprocessor_products( - variables=variables, - profile=profile, - order=order, - ancestor_products=ancestor_products, - config_user=config_user, - ) + if not fx_case: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + ) + else: + products = _get_preprocessor_products( + variables=variables, + profile=profile, + order=order, + ancestor_products=ancestor_products, + config_user=config_user, + fx_case=True + ) if not products: raise RecipeError( @@ -927,6 +947,7 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): derive_tasks.append(task) # special case: fx variable pre-processing + variable_pairs = [] for step in ('area_statistics', 'volume_statistics', 'zonal_statistics'): if profile.get(step, {}).get('fx_files'): # conserve profile @@ -957,12 +978,14 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): TASKSEP)[0] + TASKSEP + 'fx_area-volume_stats_' + \ fx_variable['variable_group'] task = _get_single_preprocessor_task( - [fx_variable], + [fx_variable, var], before, config_user, name=fx_name, + fx_case=True ) fx_preproc_tasks.append(task) + variable_pairs.append([var, fx_variable]) derive_tasks.extend(fx_preproc_tasks) @@ -1348,7 +1371,6 @@ def _resolve_diagnostic_ancestors(self, tasks): def initialize_tasks(self): """Define tasks in recipe.""" logger.info("Creating tasks from recipe") - prelim_tasks = set() tasks = set() priority = 0 @@ -1373,55 +1395,9 @@ def initialize_tasks(self): _check_duplication(task) for task0 in task.flatten(): task0.priority = priority - prelim_tasks.add(task) + tasks.add(task) priority += 1 - # remove ancestor tasks that perform the same operation - # and write to the same Preprocessor file - # eg: fx files that are needed multiple times but can be produced - # only once; this is an inter-tasks check. - # The equivanet intra-task check is done by _check_duplication - all_names = [] - for task in prelim_tasks: - if task.ancestors: - product_set = [] - for product in task.products: - for ancestor_task in task.ancestors: - product_set.append(ancestor_task.name) - for unique_task in list(set(product_set)): - all_names.append(unique_task) - else: - tasks.add(task) - priority += 1 - - # look for inter-tasks duplication by name - # this should be safe now once we've set-filtered by product - if not len(all_names) > 1 and prelim_tasks: - tasks = prelim_tasks - else: - duplicates = [] - while len(all_names) > 0: - elem = all_names.pop() - if elem in all_names: - duplicates.append(elem) - - removed_ancestors = [] - for task in prelim_tasks: - for ancestor_task in task.ancestors: - # add tasks and assign priorities - if ancestor_task.name not in duplicates: - # list:duplicates can be empty, just add the task - tasks.add(task) - priority += 3 - else: - if ancestor_task.name not in removed_ancestors: - removed_ancestors.append(ancestor_task.name) - priority -= 2 - else: - task.ancestors.remove(ancestor_task) - priority += 1 - tasks.add(task) - # Create diagnostic tasks for script_name, script_cfg in diagnostic['scripts'].items(): task_name = diagnostic_name + TASKSEP + script_name From d3548d6bef2d0568af9989619b040b97d05314c8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 19 Feb 2020 14:18:42 +0000 Subject: [PATCH 72/73] special case for fx favrs --- esmvalcore/_data_finder.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 7a27dee4f6..db6436120b 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -240,7 +240,7 @@ def get_input_filelist(variable, rootpath, drs): return (files, dirnames, filenames) -def get_output_file(variable, preproc_dir): +def get_output_file(variable, preproc_dir, fx_var_alias=None): """Return the full path to the output (preprocessed) file.""" cfg = get_project_config(variable['project']) @@ -249,12 +249,20 @@ def get_output_file(variable, preproc_dir): variable = dict(variable) variable['exp'] = '-'.join(variable['exp']) - outfile = os.path.join( - preproc_dir, - variable['diagnostic'], - variable['variable_group'], - _replace_tags(cfg['output_file'], variable)[0], - ) + if not fx_var_alias: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + variable['variable_group'], + _replace_tags(cfg['output_file'], variable)[0], + ) + else: + outfile = os.path.join( + preproc_dir, + variable['diagnostic'], + variable['variable_group'], + _replace_tags(cfg['output_file'], fx_var_alias)[0], + ) if variable['frequency'] != 'fx': outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' From 0da1ea447ed84a074cfa0786e93d5d2570d30876 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 19 Feb 2020 14:19:00 +0000 Subject: [PATCH 73/73] fixed test in light of changes --- tests/integration/test_recipe.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index f4624780d1..4952502ba6 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -2070,7 +2070,7 @@ def test_fx_vars_list_preproc_cmip6(tmp_path, patched_datafinder, fx_files = settings['fx_files'] assert isinstance(fx_files, dict) assert 'preproc' in fx_files['areacello'] - assert 'CMIP6_CanESM5_Ofx_historical_r1i1p1f1_areacello.nc' in \ + assert 'CMIP6_CanESM5_Ofx_historical_r1i1p1f1_areacello_2000-2005.nc' in \ fx_files['areacello'] @@ -2238,7 +2238,7 @@ def test_fx_vars_dicts_activity_preproc_cmip6(tmp_path, patched_datafinder, assert len(anc_products) == 1 assert len(product_files) == 1 assert os.path.basename(product_files[0]) == \ - 'CMIP6_UKESM1-0-LL_Ofx_piControl_r1i1p1f2_areacello.nc' + 'CMIP6_UKESM1-0-LL_Ofx_piControl_r1i1p1f2_areacello_2000-2005.nc' for product in task.products: assert 'volume_statistics' in product.settings assert product.attributes['short_name'] == 'tos' @@ -2551,12 +2551,5 @@ def test_fx_vars_duplicate_files(tmp_path, patched_datafinder, ] assert task_b task_b = task_b[0] - - # test removal of identical ancestors across tasks - # both task_a and task_b have the same ancestor; one - # will be removed; process is random so - # either task_a has no ancestors or task_b has no ancestors - if len(task_a.ancestors) == 1: - assert len(task_b.ancestors) == 0 - elif len(task_b.ancestors) == 1: - assert len(task_a.ancestors) == 0 + assert len(task_a.ancestors) == 1 + assert len(task_b.ancestors) == 1