From c3cceb8fd1229928de3dd16d6738b70d3bfd8fa8 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 3 Nov 2020 15:18:25 +0100 Subject: [PATCH 01/18] Update .gitignore --- .gitignore | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6e78646..e64e4c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,89 @@ -# ignore these files +# Distribution / packaging +.Python +build/ +c +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +env/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +*.orig +*.tmp +MANIFEST + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Created by editors +*~ +\#* +\.\#* +*.swp + +# Created by PyCharm +.idea/ + +# eclipse/pydev +.project +.pydevproject +.settings + +#Create by VSCode +.vscode + +#pytest +.cache +.pytest_cache + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Jupyter Notebook +.ipynb_checkpoints + +*.tmp +*.orig +/c +/tests/data/** +test-reports/ +/test_bash.sh +/python_test_out.txt + +# Build folder +doc/sphinx/build # esgf-pyclient cache *.sqlite From 4f634167696ea235db0f4038dfff70a422658c28 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 3 Nov 2020 15:18:49 +0100 Subject: [PATCH 02/18] Add basic setup.py --- setup.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a97fa7e --- /dev/null +++ b/setup.py @@ -0,0 +1,53 @@ +import os + +from setuptools import setup + +with open('README.md') as readme_file: + readme = readme_file.read() + +setup( + name='ESMValTool sample data', + version='0.0.1', + description="ESMValTool sample data", + long_description=readme + '\n\n', + author="", + author_email='', + url='https://github.com/ESMValGroup/ESMValTool_sample_data', + packages=[ + 'esmvaltool_sample_data', + ], + include_package_data=True, + license="", + zip_safe=False, + keywords='ESMValTool', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + ], + test_suite='tests', + install_requires=[ + 'cube-helper', + 'scitools-iris>=2.2', + ], + # tests_require=[ + # 'pytest', + # 'pytest-cov', + # 'pycodestyle', + # ], + extras_require={ + 'develop': [ + 'codespell', + 'docformatter', + 'isort', + 'pre-commit', + 'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4', + 'yamllint', + 'yapf', + ], + }, +) From 136e1b1c48205432729b98262ee54060193717d7 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 3 Nov 2020 15:56:18 +0100 Subject: [PATCH 03/18] Add basic functionality to load datasets --- esmvaltool_sample_data/__init__.py | 1 + esmvaltool_sample_data/loader.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 esmvaltool_sample_data/__init__.py create mode 100644 esmvaltool_sample_data/loader.py diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py new file mode 100644 index 0000000..a9ab7e8 --- /dev/null +++ b/esmvaltool_sample_data/__init__.py @@ -0,0 +1 @@ +from .loader import load_map_data, load_profile_data, load_timeseries_data diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py new file mode 100644 index 0000000..4b236db --- /dev/null +++ b/esmvaltool_sample_data/loader.py @@ -0,0 +1,53 @@ +from pathlib import Path + +import cube_helper + +base_dir = Path(__file__).parent + + +def load_timeseries_data(): + """ + ta / Amon / historical / r1i1p1f1, any grid, 1850 - onwards, all dimensions reduced to a few steps except for the time dimension + some other variable / ocean, probably a different frequency, similar number of timesteps, other dimensions reduced + """ + + timeseries_dir = base_dir / 'data' / 'timeseries' + + data_dirs = [ + 'CMIP6.CMIP.CAMS.CAMS-CSM1-0.historical.r1i1p1f1.Amon.ta.gn.v20190708', + 'CMIP6.CMIP.CCCR-IITM.IITM-ESM.historical.r1i1p1f1.Amon.ta.gn.v20191226', + 'CMIP6.CMIP.CSIRO-ARCCSS.ACCESS-CM2.historical.r1i1p1f1.Amon.ta.gn.v20191108', + 'CMIP6.CMIP.E3SM-Project.E3SM-1-1.historical.r1i1p1f1.Amon.ta.gr.v20191211', + 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', + 'CMIP6.CMIP.HAMMOZ-Consortium.MPI-ESM-1-2-HAM.historical.r1i1p1f1.Amon.ta.gn.v20190627', + 'CMIP6.CMIP.INM.INM-CM4-8.historical.r1i1p1f1.Amon.ta.gr1.v20190605', + 'CMIP6.CMIP.INM.INM-CM5-0.historical.r1i1p1f1.Amon.ta.gr1.v20190610', + 'CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r1i1p1f1.Amon.ta.gr.v20180803', + 'CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.Amon.ta.gn.v20190710', + 'CMIP6.CMIP.MPI-M.MPI-ESM1-2-LR.historical.r1i1p1f1.Amon.ta.gn.v20190710', + 'CMIP6.CMIP.NOAA-GFDL.GFDL-CM4.historical.r1i1p1f1.Amon.ta.gr1.v20180701', + 'CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Amon.ta.gr1.v20190726', + ] + + input_dirs = [timeseries_dir / data_dir for data_dir in data_dirs] + + cubelists = [] + + for input_dir in input_dirs: + print(input_dir) + cubelist = cube_helper.load(str(input_dir), filetype='.nc') + cubelists.append(cubelist) + + return cubelists + + +def load_map_data(): + """a 4D atmospheric variable, all dimensions reduced to a few steps except + the horizontal dimension(s) same for an ocean variable.""" + raise NotImplementedError + + +def load_profile_data(): + """a 4D atmospheric variable, all dimensions reduced to a few steps except + the horizontal dimension(s) same for an ocean variable.""" + raise NotImplementedError From f3c9a30fabcd277eac77cbe45e6e06e673396cba Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 4 Nov 2020 10:01:09 +0100 Subject: [PATCH 04/18] Set cube helper logging level to ERROR to hide warnings i.e. `tracking_id, history and creation_date attributes inconsistent` --- esmvaltool_sample_data/loader.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 4b236db..c3597e1 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -2,6 +2,8 @@ import cube_helper +cube_helper.logger.muffle_logger() + base_dir = Path(__file__).parent @@ -51,3 +53,9 @@ def load_profile_data(): """a 4D atmospheric variable, all dimensions reduced to a few steps except the horizontal dimension(s) same for an ocean variable.""" raise NotImplementedError + + +if __name__ == '__main__': + cube_helper.logger.reset_logger() + ts = load_timeseries_data() + breakpoint() From 294833ea2f061bf265b071cbba44e47dacceb812 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 4 Nov 2020 12:18:40 +0100 Subject: [PATCH 05/18] Comment out problematic dataset --- esmvaltool_sample_data/loader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index c3597e1..2eddfd4 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -20,7 +20,6 @@ def load_timeseries_data(): 'CMIP6.CMIP.CCCR-IITM.IITM-ESM.historical.r1i1p1f1.Amon.ta.gn.v20191226', 'CMIP6.CMIP.CSIRO-ARCCSS.ACCESS-CM2.historical.r1i1p1f1.Amon.ta.gn.v20191108', 'CMIP6.CMIP.E3SM-Project.E3SM-1-1.historical.r1i1p1f1.Amon.ta.gr.v20191211', - 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', 'CMIP6.CMIP.HAMMOZ-Consortium.MPI-ESM-1-2-HAM.historical.r1i1p1f1.Amon.ta.gn.v20190627', 'CMIP6.CMIP.INM.INM-CM4-8.historical.r1i1p1f1.Amon.ta.gr1.v20190605', 'CMIP6.CMIP.INM.INM-CM5-0.historical.r1i1p1f1.Amon.ta.gr1.v20190610', @@ -29,6 +28,10 @@ def load_timeseries_data(): 'CMIP6.CMIP.MPI-M.MPI-ESM1-2-LR.historical.r1i1p1f1.Amon.ta.gn.v20190710', 'CMIP6.CMIP.NOAA-GFDL.GFDL-CM4.historical.r1i1p1f1.Amon.ta.gr1.v20180701', 'CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Amon.ta.gr1.v20190726', + + # BUG: next dataset is problematic + # raises ValueError: Cube 'air_temperature' must contain a single 1D y coordinate. + 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', ] input_dirs = [timeseries_dir / data_dir for data_dir in data_dirs] From 869dc5b8c2fc35b4f27742c2948ff3aa0e833aa1 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 5 Nov 2020 10:56:33 +0100 Subject: [PATCH 06/18] Speed up data loading and lose the cube-helper dependency --- esmvaltool_sample_data/loader.py | 55 +++++++++++++++++++++++--------- setup.py | 3 +- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 2eddfd4..7fa257d 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -1,16 +1,46 @@ from pathlib import Path -import cube_helper - -cube_helper.logger.muffle_logger() +import cf_units +import iris base_dir = Path(__file__).parent +def strip_attributes(cube): + """Remove attributes that cause issues with merging and concatenation.""" + for attr in ['creation_date', 'tracking_id', 'history']: + if attr in cube.attributes: + cube.attributes.pop(attr) + + +def simplify_time(cube): + coord = cube.coord('time') + coord.convert_units( + cf_units.Unit('days since 1850-1-1 00:00:00', + calendar=coord.units.calendar)) + + +def load_cubes_from_input_dirs(input_dirs): + """Loads all *.nc files from each input dir into a cube.""" + for input_dir in input_dirs: + files = input_dir.glob('*.nc') + cubes = iris.load(str(file) for file in files) + for cube in cubes: + strip_attributes(cube) + simplify_time(cube) + + cubes = cubes.concatenate() + cube = cubes[0] + + yield cube + + def load_timeseries_data(): """ - ta / Amon / historical / r1i1p1f1, any grid, 1850 - onwards, all dimensions reduced to a few steps except for the time dimension - some other variable / ocean, probably a different frequency, similar number of timesteps, other dimensions reduced + Data: ta / Amon / historical / r1i1p1f1, any grid, 1850 - onwards. + All dimensions reduced to a few steps except for the time dimension + Some other variable / ocean, probably a different frequency, + similar number of timesteps, other dimensions reduced. """ timeseries_dir = base_dir / 'data' / 'timeseries' @@ -30,20 +60,16 @@ def load_timeseries_data(): 'CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Amon.ta.gr1.v20190726', # BUG: next dataset is problematic - # raises ValueError: Cube 'air_temperature' must contain a single 1D y coordinate. - 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', + # raises ValueError: Cube 'air_temperature' must contain + # a single 1D y coordinate. + # 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', ] input_dirs = [timeseries_dir / data_dir for data_dir in data_dirs] - cubelists = [] - - for input_dir in input_dirs: - print(input_dir) - cubelist = cube_helper.load(str(input_dir), filetype='.nc') - cubelists.append(cubelist) + cubes = load_cubes_from_input_dirs(input_dirs) - return cubelists + return list(cubes) def load_map_data(): @@ -59,6 +85,5 @@ def load_profile_data(): if __name__ == '__main__': - cube_helper.logger.reset_logger() ts = load_timeseries_data() breakpoint() diff --git a/setup.py b/setup.py index a97fa7e..e736f5f 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ ], test_suite='tests', install_requires=[ - 'cube-helper', 'scitools-iris>=2.2', ], # tests_require=[ @@ -40,7 +39,7 @@ # 'pycodestyle', # ], extras_require={ - 'develop': [ + 'develop': [ 'codespell', 'docformatter', 'isort', From 24bb39a94c23e784053056f9a0f672af8d38cf98 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 5 Nov 2020 17:09:21 +0100 Subject: [PATCH 07/18] Rename data -> cubes --- esmvaltool_sample_data/__init__.py | 2 +- esmvaltool_sample_data/loader.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index a9ab7e8..cc8e762 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -1 +1 @@ -from .loader import load_map_data, load_profile_data, load_timeseries_data +from .loader import load_map_cubes, load_profile_cubes, load_timeseries_cubes diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 7fa257d..66db39c 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -35,7 +35,7 @@ def load_cubes_from_input_dirs(input_dirs): yield cube -def load_timeseries_data(): +def load_timeseries_cubes(): """ Data: ta / Amon / historical / r1i1p1f1, any grid, 1850 - onwards. All dimensions reduced to a few steps except for the time dimension @@ -72,18 +72,18 @@ def load_timeseries_data(): return list(cubes) -def load_map_data(): +def load_map_cubes(): """a 4D atmospheric variable, all dimensions reduced to a few steps except the horizontal dimension(s) same for an ocean variable.""" raise NotImplementedError -def load_profile_data(): +def load_profile_cubes(): """a 4D atmospheric variable, all dimensions reduced to a few steps except the horizontal dimension(s) same for an ocean variable.""" raise NotImplementedError if __name__ == '__main__': - ts = load_timeseries_data() + ts = load_timeseries_cubes() breakpoint() From 29885ec4b183607c50e4c60ecbb5e57534f0e62a Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 6 Nov 2020 11:11:47 +0100 Subject: [PATCH 08/18] Add package data --- MANIFEST.in | 1 + setup.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..9197b5f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-include esmvaltool_sample_data/data/ *.nc diff --git a/setup.py b/setup.py index e736f5f..8ae9475 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,12 @@ -import os - from setuptools import setup with open('README.md') as readme_file: readme = readme_file.read() +PACKAGES = [ + 'esmvaltool_sample_data', +] + setup( name='ESMValTool sample data', version='0.0.1', @@ -13,9 +15,7 @@ author="", author_email='', url='https://github.com/ESMValGroup/ESMValTool_sample_data', - packages=[ - 'esmvaltool_sample_data', - ], + packages=PACKAGES, include_package_data=True, license="", zip_safe=False, From 26f3370a42ca8136ae06e866773de468141676e0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 9 Nov 2020 12:09:35 +0000 Subject: [PATCH 09/18] Update esmvaltool_sample_data/loader.py Co-authored-by: Bouwe Andela --- esmvaltool_sample_data/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 66db39c..ade4100 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -80,7 +80,7 @@ def load_map_cubes(): def load_profile_cubes(): """a 4D atmospheric variable, all dimensions reduced to a few steps except - the horizontal dimension(s) same for an ocean variable.""" + the vertical dimension(s) same for an ocean variable.""" raise NotImplementedError From 1316756cc15879b2670ede8ae8590cb4d3ee632e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 10 Nov 2020 11:43:54 +0100 Subject: [PATCH 10/18] Add developer imports --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 8ae9475..2e95a67 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,9 @@ 'develop': [ 'codespell', 'docformatter', + 'esgf-pyclient', 'isort', + 'myproxyclient', 'pre-commit', 'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4', 'yamllint', From e46ae61f1f8e2d893cf1f50b1ef15a9339aa98fc Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 10 Nov 2020 11:44:43 +0100 Subject: [PATCH 11/18] Address review comments --- esmvaltool_sample_data/loader.py | 42 +++++++++++++------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index ade4100..0300997 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -5,6 +5,14 @@ base_dir = Path(__file__).parent +problematic = [ + # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. + 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', + # UserWarning: Gracefully filling 'lat' dimension coordinate masked points + 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', + 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', +] + def strip_attributes(cube): """Remove attributes that cause issues with merging and concatenation.""" @@ -23,21 +31,24 @@ def simplify_time(cube): def load_cubes_from_input_dirs(input_dirs): """Loads all *.nc files from each input dir into a cube.""" for input_dir in input_dirs: + if str(input_dir) in problematic: + # print('Skipping', input_dir) + continue + # print(input_dir) files = input_dir.glob('*.nc') cubes = iris.load(str(file) for file in files) for cube in cubes: strip_attributes(cube) simplify_time(cube) - cubes = cubes.concatenate() - cube = cubes[0] + cube = cubes.concatenate_cube() yield cube -def load_timeseries_cubes(): +def load_timeseries_cubes(mip_table='Amon'): """ - Data: ta / Amon / historical / r1i1p1f1, any grid, 1850 - onwards. + Data: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. All dimensions reduced to a few steps except for the time dimension Some other variable / ocean, probably a different frequency, similar number of timesteps, other dimensions reduced. @@ -45,27 +56,8 @@ def load_timeseries_cubes(): timeseries_dir = base_dir / 'data' / 'timeseries' - data_dirs = [ - 'CMIP6.CMIP.CAMS.CAMS-CSM1-0.historical.r1i1p1f1.Amon.ta.gn.v20190708', - 'CMIP6.CMIP.CCCR-IITM.IITM-ESM.historical.r1i1p1f1.Amon.ta.gn.v20191226', - 'CMIP6.CMIP.CSIRO-ARCCSS.ACCESS-CM2.historical.r1i1p1f1.Amon.ta.gn.v20191108', - 'CMIP6.CMIP.E3SM-Project.E3SM-1-1.historical.r1i1p1f1.Amon.ta.gr.v20191211', - 'CMIP6.CMIP.HAMMOZ-Consortium.MPI-ESM-1-2-HAM.historical.r1i1p1f1.Amon.ta.gn.v20190627', - 'CMIP6.CMIP.INM.INM-CM4-8.historical.r1i1p1f1.Amon.ta.gr1.v20190605', - 'CMIP6.CMIP.INM.INM-CM5-0.historical.r1i1p1f1.Amon.ta.gr1.v20190610', - 'CMIP6.CMIP.IPSL.IPSL-CM6A-LR.historical.r1i1p1f1.Amon.ta.gr.v20180803', - 'CMIP6.CMIP.MPI-M.MPI-ESM1-2-HR.historical.r1i1p1f1.Amon.ta.gn.v20190710', - 'CMIP6.CMIP.MPI-M.MPI-ESM1-2-LR.historical.r1i1p1f1.Amon.ta.gn.v20190710', - 'CMIP6.CMIP.NOAA-GFDL.GFDL-CM4.historical.r1i1p1f1.Amon.ta.gr1.v20180701', - 'CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1p1f1.Amon.ta.gr1.v20190726', - - # BUG: next dataset is problematic - # raises ValueError: Cube 'air_temperature' must contain - # a single 1D y coordinate. - # 'CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204', - ] - - input_dirs = [timeseries_dir / data_dir for data_dir in data_dirs] + paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') + input_dirs = list(set(path.parent for path in paths)) cubes = load_cubes_from_input_dirs(input_dirs) From 042dd66f12700d190004a873311110ad756a0b57 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 10 Nov 2020 11:50:17 +0100 Subject: [PATCH 12/18] Update doc strings and add annotations --- esmvaltool_sample_data/loader.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 0300997..8baf682 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -14,22 +14,24 @@ ] -def strip_attributes(cube): - """Remove attributes that cause issues with merging and concatenation.""" +def strip_attributes(cube: 'iris.Cube') -> None: + """Remove attributes in-place that cause issues with merging and + concatenation.""" for attr in ['creation_date', 'tracking_id', 'history']: if attr in cube.attributes: cube.attributes.pop(attr) -def simplify_time(cube): +def simplify_time(cube: 'iris.Cube') -> None: + """Simplifies the time coordinate in-place.""" coord = cube.coord('time') coord.convert_units( cf_units.Unit('days since 1850-1-1 00:00:00', calendar=coord.units.calendar)) -def load_cubes_from_input_dirs(input_dirs): - """Loads all *.nc files from each input dir into a cube.""" +def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': + """Generator that loads all *.nc files from each input dir into a cube.""" for input_dir in input_dirs: if str(input_dir) in problematic: # print('Skipping', input_dir) @@ -46,12 +48,20 @@ def load_cubes_from_input_dirs(input_dirs): yield cube -def load_timeseries_cubes(mip_table='Amon'): - """ - Data: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. - All dimensions reduced to a few steps except for the time dimension - Some other variable / ocean, probably a different frequency, - similar number of timesteps, other dimensions reduced. +def load_timeseries_cubes(mip_table: str = 'Amon') -> list: + """Returns a list of iris cubes with timeseries data. + + The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. + All dimensions were reduced to a few steps except for the time dimension. + + Parameters + ---------- + mip_table: str + select monthly (`Amon`) or daily (`day`) data. + + Returns + ------- + list of iris.cube """ timeseries_dir = base_dir / 'data' / 'timeseries' From 179416de65d7ce8ec83fb6cfab12542f3dd2826d Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 10 Nov 2020 13:34:36 +0100 Subject: [PATCH 13/18] Select subset of data --- esmvaltool_sample_data/loader.py | 41 ++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py index 8baf682..df24f94 100644 --- a/esmvaltool_sample_data/loader.py +++ b/esmvaltool_sample_data/loader.py @@ -7,10 +7,20 @@ problematic = [ # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. - 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', + 'data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', # UserWarning: Gracefully filling 'lat' dimension coordinate masked points - 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', - 'esmvaltool_sample_data/data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', +] + +whitelist = [ + # (780, 2, 2, 2) 365_day + 'data/timeseries/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/ta/gr/v20190927', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-0/historical/r1i1p1f1/Amon/ta/gr/v20191220', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1-ECA/historical/r1i1p1f1/Amon/ta/gr/v20200624', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1/historical/r1i1p1f1/Amon/ta/gr/v20191211', + 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Amon/ta/gr1/v20180701', + 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Amon/ta/gr1/v20190726', ] @@ -33,10 +43,6 @@ def simplify_time(cube: 'iris.Cube') -> None: def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': """Generator that loads all *.nc files from each input dir into a cube.""" for input_dir in input_dirs: - if str(input_dir) in problematic: - # print('Skipping', input_dir) - continue - # print(input_dir) files = input_dir.glob('*.nc') cubes = iris.load(str(file) for file in files) for cube in cubes: @@ -45,9 +51,27 @@ def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': cube = cubes.concatenate_cube() + # print(cube.shape, cube.coord('time').units.calendar, input_dir) + yield cube +def filter_problematic(dirs): + base = Path(__file__).parent + for drc in dirs: + relative_dir = drc.relative_to(base) + if str(relative_dir) not in problematic: + yield drc + + +def get_subset(dirs): + base = Path(__file__).parent + for drc in dirs: + relative_dir = drc.relative_to(base) + if str(relative_dir) in whitelist: + yield drc + + def load_timeseries_cubes(mip_table: str = 'Amon') -> list: """Returns a list of iris cubes with timeseries data. @@ -69,6 +93,9 @@ def load_timeseries_cubes(mip_table: str = 'Amon') -> list: paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') input_dirs = list(set(path.parent for path in paths)) + input_dirs = filter_problematic(input_dirs) + input_dirs = get_subset(input_dirs) + cubes = load_cubes_from_input_dirs(input_dirs) return list(cubes) From 6de61547723f18fef3850d9863eecdae6d645491 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 10 Nov 2020 16:00:44 +0100 Subject: [PATCH 14/18] Add whitelists for specific subsets of data --- esmvaltool_sample_data/__init__.py | 126 ++++++++++++++++++++++++++++- esmvaltool_sample_data/loader.py | 118 --------------------------- 2 files changed, 125 insertions(+), 119 deletions(-) delete mode 100644 esmvaltool_sample_data/loader.py diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index cc8e762..c359921 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -1 +1,125 @@ -from .loader import load_map_cubes, load_profile_cubes, load_timeseries_cubes +from pathlib import Path + +import cf_units +import iris + +base_dir = Path(__file__).parent + +problematic = [ + # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. + 'data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', + # UserWarning: Gracefully filling 'lat' dimension coordinate masked points + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', + 'data/timeseries/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/historical/r1i1p1f1/day/ta/gn/v20191108', +] + +whitelist = { + 'Amon': ( + # (780, 2, 2, 2) 365_day + 'data/timeseries/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/ta/gr/v20190927', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-0/historical/r1i1p1f1/Amon/ta/gr/v20191220', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1-ECA/historical/r1i1p1f1/Amon/ta/gr/v20200624', + 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1/historical/r1i1p1f1/Amon/ta/gr/v20191211', + 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Amon/ta/gr1/v20180701', + 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Amon/ta/gr1/v20190726', + ), + 'day': ( + # (3650, 2, 3, 2) 365_day + 'data/timeseries/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/day/ta/gn/v20200626', + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/ta/gn/v20190227', + 'data/timeseries/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/ta/gn/v20190308', + 'data/timeseries/CMIP6/CMIP/NCC/NorESM2-MM/historical/r1i1p1f1/day/ta/gn/v20191108', + 'data/timeseries/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/historical/r1i1p1f1/day/ta/gn/v20181015', + ) +} + + +def strip_attributes(cube: 'iris.Cube') -> None: + """Remove attributes in-place that cause issues with merging and + concatenation.""" + for attr in ['creation_date', 'tracking_id', 'history']: + if attr in cube.attributes: + cube.attributes.pop(attr) + + +def simplify_time(cube: 'iris.Cube') -> None: + """Simplifies the time coordinate in-place.""" + coord = cube.coord('time') + coord.convert_units( + cf_units.Unit('days since 1850-1-1 00:00:00', + calendar=coord.units.calendar)) + + +def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': + """Generator that loads all *.nc files from each input dir into a cube.""" + for input_dir in input_dirs: + files = input_dir.glob('*.nc') + cubes = iris.load(str(file) for file in files) + for cube in cubes: + strip_attributes(cube) + simplify_time(cube) + + cube = cubes.concatenate_cube() + + # print(cube.shape, cube.coord('time').units.calendar, input_dir) + + yield cube + + +def get_subset(dirs, subset): + base = Path(__file__).parent + for drc in dirs: + relative_dir = drc.relative_to(base) + if str(relative_dir) in problematic: + continue + if str(relative_dir) in subset: + yield drc + + +def load_timeseries_cubes(mip_table: str = 'Amon') -> list: + """Returns a list of iris cubes with timeseries data. + + The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. + All dimensions were reduced to a few steps except for the time dimension. + + Parameters + ---------- + mip_table: str + select monthly (`Amon`) or daily (`day`) data. + + Returns + ------- + list of iris.cube + """ + + timeseries_dir = base_dir / 'data' / 'timeseries' + + paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') + input_dirs = list(set(path.parent for path in paths)) + + subset = whitelist[mip_table] + + input_dirs = get_subset(input_dirs, subset=subset) + + cubes = load_cubes_from_input_dirs(input_dirs) + + return list(cubes) + + +def load_map_cubes(): + """a 4D atmospheric variable, all dimensions reduced to a few steps except + the horizontal dimension(s) same for an ocean variable.""" + raise NotImplementedError + + +def load_profile_cubes(): + """a 4D atmospheric variable, all dimensions reduced to a few steps except + the vertical dimension(s) same for an ocean variable.""" + raise NotImplementedError + + +if __name__ == '__main__': + ts_day = load_timeseries_cubes('day') + ts_amon = load_timeseries_cubes('Amon') + breakpoint() diff --git a/esmvaltool_sample_data/loader.py b/esmvaltool_sample_data/loader.py deleted file mode 100644 index df24f94..0000000 --- a/esmvaltool_sample_data/loader.py +++ /dev/null @@ -1,118 +0,0 @@ -from pathlib import Path - -import cf_units -import iris - -base_dir = Path(__file__).parent - -problematic = [ - # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. - 'data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', - # UserWarning: Gracefully filling 'lat' dimension coordinate masked points - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', -] - -whitelist = [ - # (780, 2, 2, 2) 365_day - 'data/timeseries/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/ta/gr/v20190927', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-0/historical/r1i1p1f1/Amon/ta/gr/v20191220', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1-ECA/historical/r1i1p1f1/Amon/ta/gr/v20200624', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1/historical/r1i1p1f1/Amon/ta/gr/v20191211', - 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Amon/ta/gr1/v20180701', - 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Amon/ta/gr1/v20190726', -] - - -def strip_attributes(cube: 'iris.Cube') -> None: - """Remove attributes in-place that cause issues with merging and - concatenation.""" - for attr in ['creation_date', 'tracking_id', 'history']: - if attr in cube.attributes: - cube.attributes.pop(attr) - - -def simplify_time(cube: 'iris.Cube') -> None: - """Simplifies the time coordinate in-place.""" - coord = cube.coord('time') - coord.convert_units( - cf_units.Unit('days since 1850-1-1 00:00:00', - calendar=coord.units.calendar)) - - -def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': - """Generator that loads all *.nc files from each input dir into a cube.""" - for input_dir in input_dirs: - files = input_dir.glob('*.nc') - cubes = iris.load(str(file) for file in files) - for cube in cubes: - strip_attributes(cube) - simplify_time(cube) - - cube = cubes.concatenate_cube() - - # print(cube.shape, cube.coord('time').units.calendar, input_dir) - - yield cube - - -def filter_problematic(dirs): - base = Path(__file__).parent - for drc in dirs: - relative_dir = drc.relative_to(base) - if str(relative_dir) not in problematic: - yield drc - - -def get_subset(dirs): - base = Path(__file__).parent - for drc in dirs: - relative_dir = drc.relative_to(base) - if str(relative_dir) in whitelist: - yield drc - - -def load_timeseries_cubes(mip_table: str = 'Amon') -> list: - """Returns a list of iris cubes with timeseries data. - - The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards. - All dimensions were reduced to a few steps except for the time dimension. - - Parameters - ---------- - mip_table: str - select monthly (`Amon`) or daily (`day`) data. - - Returns - ------- - list of iris.cube - """ - - timeseries_dir = base_dir / 'data' / 'timeseries' - - paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') - input_dirs = list(set(path.parent for path in paths)) - - input_dirs = filter_problematic(input_dirs) - input_dirs = get_subset(input_dirs) - - cubes = load_cubes_from_input_dirs(input_dirs) - - return list(cubes) - - -def load_map_cubes(): - """a 4D atmospheric variable, all dimensions reduced to a few steps except - the horizontal dimension(s) same for an ocean variable.""" - raise NotImplementedError - - -def load_profile_cubes(): - """a 4D atmospheric variable, all dimensions reduced to a few steps except - the vertical dimension(s) same for an ocean variable.""" - raise NotImplementedError - - -if __name__ == '__main__': - ts = load_timeseries_cubes() - breakpoint() From 710989a13d16e5ce0d02064a315a972380c757ac Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 17 Nov 2020 15:43:42 +0100 Subject: [PATCH 15/18] Use ignore list to filter problematic datasets --- esmvaltool_sample_data/__init__.py | 66 ++++++++++++----------------- esmvaltool_sample_data/datasets.yml | 2 + 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index c359921..c57ee57 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -2,37 +2,20 @@ import cf_units import iris +import yaml base_dir = Path(__file__).parent -problematic = [ - # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. - 'data/timeseries/CMIP6/CMIP/NCC/NorCPM1/historical/r1i1p1f1/Amon/ta/gn/v20190914', - # UserWarning: Gracefully filling 'lat' dimension coordinate masked points - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM-FV2/historical/r1i1p1f1/Amon/ta/gn/v20191120', - 'data/timeseries/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/historical/r1i1p1f1/day/ta/gn/v20191108', -] +VERBOSE = False + +with open(base_dir / 'datasets.yml', 'r') as f: + config = yaml.safe_load(f) -whitelist = { - 'Amon': ( - # (780, 2, 2, 2) 365_day - 'data/timeseries/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/ta/gr/v20190927', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-0/historical/r1i1p1f1/Amon/ta/gr/v20191220', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1-ECA/historical/r1i1p1f1/Amon/ta/gr/v20200624', - 'data/timeseries/CMIP6/CMIP/E3SM-Project/E3SM-1-1/historical/r1i1p1f1/Amon/ta/gr/v20191211', - 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Amon/ta/gr1/v20180701', - 'data/timeseries/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/Amon/ta/gr1/v20190726', - ), - 'day': ( - # (3650, 2, 3, 2) 365_day - 'data/timeseries/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/day/ta/gn/v20200626', - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r1i1p1f1/day/ta/gn/v20190227', - 'data/timeseries/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/day/ta/gn/v20190308', - 'data/timeseries/CMIP6/CMIP/NCC/NorESM2-MM/historical/r1i1p1f1/day/ta/gn/v20191108', - 'data/timeseries/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/historical/r1i1p1f1/day/ta/gn/v20181015', - ) -} +ignore_list = [fn.replace('.', '/') for fn in config['ignore']] + +ignore_list += [ + # Add paths to problematic data sets here or to `datasets.yml` +] def strip_attributes(cube: 'iris.Cube') -> None: @@ -53,7 +36,10 @@ def simplify_time(cube: 'iris.Cube') -> None: def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': """Generator that loads all *.nc files from each input dir into a cube.""" - for input_dir in input_dirs: + for i, input_dir in enumerate(sorted(input_dirs)): + if VERBOSE: + print(f'Loading #{i:02d}:', input_dir) + files = input_dir.glob('*.nc') cubes = iris.load(str(file) for file in files) for cube in cubes: @@ -62,19 +48,19 @@ def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube': cube = cubes.concatenate_cube() - # print(cube.shape, cube.coord('time').units.calendar, input_dir) + if VERBOSE: + print(' ', cube.shape, cube.coord('time').units.calendar) yield cube -def get_subset(dirs, subset): - base = Path(__file__).parent +def filter_ignored_datasets(dirs, root): for drc in dirs: - relative_dir = drc.relative_to(base) - if str(relative_dir) in problematic: - continue - if str(relative_dir) in subset: + test_drc = str(drc.relative_to(root)) + if test_drc not in ignore_list: yield drc + elif VERBOSE: + print('Ignored:', test_drc) def load_timeseries_cubes(mip_table: str = 'Amon') -> list: @@ -98,9 +84,7 @@ def load_timeseries_cubes(mip_table: str = 'Amon') -> list: paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc') input_dirs = list(set(path.parent for path in paths)) - subset = whitelist[mip_table] - - input_dirs = get_subset(input_dirs, subset=subset) + input_dirs = list(filter_ignored_datasets(input_dirs, timeseries_dir)) cubes = load_cubes_from_input_dirs(input_dirs) @@ -120,6 +104,12 @@ def load_profile_cubes(): if __name__ == '__main__': + VERBOSE = True + + print('Loading daily data') ts_day = load_timeseries_cubes('day') + print() + print('Loading monthly data') ts_amon = load_timeseries_cubes('Amon') + breakpoint() diff --git a/esmvaltool_sample_data/datasets.yml b/esmvaltool_sample_data/datasets.yml index b3806a4..a3ca9c2 100644 --- a/esmvaltool_sample_data/datasets.yml +++ b/esmvaltool_sample_data/datasets.yml @@ -27,3 +27,5 @@ ignore: - CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.r1i1p1f1.Amon.ta.gr.v20200217 # something wrong with lon coord - CMIP6.CMIP.UA.MCM-UA-1-0.historical.r1i1p1f1.Amon.ta.gn.v20190731 + # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. + - CMIP6.CMIP.NCC.NorCPM1.historical.r1i1p1f1.Amon.ta.gn.v20190914 From d478a8dbe5d4a9f5fbfc5ddcf6428e462d0b7d0a Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 17 Nov 2020 16:01:29 +0100 Subject: [PATCH 16/18] Ignore dataset that fails to regrid --- esmvaltool_sample_data/__init__.py | 17 ++++++++++++----- esmvaltool_sample_data/datasets.yml | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index c57ee57..d726b71 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -106,10 +106,17 @@ def load_profile_cubes(): if __name__ == '__main__': VERBOSE = True - print('Loading daily data') - ts_day = load_timeseries_cubes('day') - print() - print('Loading monthly data') - ts_amon = load_timeseries_cubes('Amon') + for mip_table in ( + 'Amon', + 'day', + ): + print() + print(f'Loading `{mip_table}`') + ts = load_timeseries_cubes(mip_table) + + first_cube = ts[0] + for i, cube in enumerate(ts): + print(i) + cube.regrid(grid=first_cube, scheme=iris.analysis.Linear()) breakpoint() diff --git a/esmvaltool_sample_data/datasets.yml b/esmvaltool_sample_data/datasets.yml index a3ca9c2..bc182b6 100644 --- a/esmvaltool_sample_data/datasets.yml +++ b/esmvaltool_sample_data/datasets.yml @@ -29,3 +29,5 @@ ignore: - CMIP6.CMIP.UA.MCM-UA-1-0.historical.r1i1p1f1.Amon.ta.gn.v20190731 # iris.exceptions.ConcatenateError: failed to concatenate into a single cube. - CMIP6.CMIP.NCC.NorCPM1.historical.r1i1p1f1.Amon.ta.gn.v20190914 + # Regridding -> ValueError: Cube 'air_temperature' must contain a single 1D y coordinate. + - CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204 From f2d4e97693e7df0a40c0eb9e543b9f1a85cbca36 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 17 Nov 2020 16:59:28 +0100 Subject: [PATCH 17/18] Remove unused functions --- esmvaltool_sample_data/__init__.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index d726b71..2ebcd74 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -91,18 +91,6 @@ def load_timeseries_cubes(mip_table: str = 'Amon') -> list: return list(cubes) -def load_map_cubes(): - """a 4D atmospheric variable, all dimensions reduced to a few steps except - the horizontal dimension(s) same for an ocean variable.""" - raise NotImplementedError - - -def load_profile_cubes(): - """a 4D atmospheric variable, all dimensions reduced to a few steps except - the vertical dimension(s) same for an ocean variable.""" - raise NotImplementedError - - if __name__ == '__main__': VERBOSE = True From 256a9a9082477d4e837ac4ebdcd5ef0d918c279e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 17 Nov 2020 17:00:34 +0100 Subject: [PATCH 18/18] Remove code used for testing --- esmvaltool_sample_data/__init__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/esmvaltool_sample_data/__init__.py b/esmvaltool_sample_data/__init__.py index 2ebcd74..6544d48 100644 --- a/esmvaltool_sample_data/__init__.py +++ b/esmvaltool_sample_data/__init__.py @@ -13,10 +13,6 @@ ignore_list = [fn.replace('.', '/') for fn in config['ignore']] -ignore_list += [ - # Add paths to problematic data sets here or to `datasets.yml` -] - def strip_attributes(cube: 'iris.Cube') -> None: """Remove attributes in-place that cause issues with merging and @@ -107,4 +103,4 @@ def load_timeseries_cubes(mip_table: str = 'Amon') -> list: print(i) cube.regrid(grid=first_cube, scheme=iris.analysis.Linear()) - breakpoint() + # breakpoint()