From c97a57d02b84a6ee812b71ce1bcdd5ba4267fe0f Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 8 Nov 2016 15:52:50 +0000 Subject: [PATCH] Revised documentation, whatsnew and deprecations. --- ...te_2016-Nov-09_experimental_fieldsfile.txt | 3 + ...ture_2016-Nov-09_structured_um_loading.txt | 50 ++++++++ lib/iris/experimental/fieldsfile.py | 20 +++- lib/iris/fileformats/um/__init__.py | 3 +- lib/iris/fileformats/um/_fast_load.py | 113 +++++++++++------- .../integration/fast_load/test_fast_load.py | 2 +- 6 files changed, 147 insertions(+), 44 deletions(-) create mode 100644 docs/iris/src/whatsnew/contributions_1.11/deprecate_2016-Nov-09_experimental_fieldsfile.txt create mode 100644 docs/iris/src/whatsnew/contributions_1.11/newfeature_2016-Nov-09_structured_um_loading.txt diff --git a/docs/iris/src/whatsnew/contributions_1.11/deprecate_2016-Nov-09_experimental_fieldsfile.txt b/docs/iris/src/whatsnew/contributions_1.11/deprecate_2016-Nov-09_experimental_fieldsfile.txt new file mode 100644 index 0000000000..78ef1db74d --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.11/deprecate_2016-Nov-09_experimental_fieldsfile.txt @@ -0,0 +1,3 @@ +* The :mod:`iris.experimental.fieldsfile` has been deprecated, in favour of the + new fast-loading mechanism provided by + :meth:`iris.fileformats.um.structured_um_loading`. diff --git a/docs/iris/src/whatsnew/contributions_1.11/newfeature_2016-Nov-09_structured_um_loading.txt b/docs/iris/src/whatsnew/contributions_1.11/newfeature_2016-Nov-09_structured_um_loading.txt new file mode 100644 index 0000000000..dee9899326 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_1.11/newfeature_2016-Nov-09_structured_um_loading.txt @@ -0,0 +1,50 @@ +* Support for "fast" loading of UM files has been added. + This can dramatically accelerate loading speeds of fieldsfiles and PP files, + if they meet certain common structural requirements, typical for these types + of datafile. + + See : :meth:`iris.fileformats.um.structured_um_loading`. + + .. Note: + + This updates and replaces the experimental code in + :mod:`iris.experimental.fieldsfile`, which is now deprecated in favour of + the new, supported facility. + + [[ + **NOTE TO RELEASE COMPILER** + This needs highlighting, and a better explanation than the plain reference + documentation. We probably want a "featured item" section in the whatsnew, + along the following lines ... + ]] + Fast UM file loading: + --------------------- + Support has been added for accelerated loading of UM files (PP and + Fieldsfile), when these have a suitable regular 'structured' form. + Normal Fieldsfiles are generally suitable, and many PP files also. + + A context manager is used to enable fast um loading in all the regular iris + load functions, such as :meth:`iris.load` and :meth:`iris.load_cube`, + when loading data from UM file types. + + For full details, see : :meth:`iris.fileformats.um.structured_um_loading`. + + Not all input files will be suitable for structured loading: Each file + must have a regular repeating order of time and vertical levels, as described + in the detail documentation. + This is generally the case for most ordinary (model output) fieldsfiles, but + not necessarily for PP files. + It is the user's responsibility to use structured loading only with suitable + inputs. Otherwise, odd behaviour and even incorrect loading can result, as + input files are not checked as fully as in a normal load. + + Even where applicable, structured loading is not an *identical* replacement + for normal loading, even though the load call is generally the same and the + returned results may be the same in many cases. + + * results are often somewhat different, especially regarding the order of + dimensions and the choice of dimension coordinates. + + * although both constraints and user callbacks are supported, callback + routines will generally need to be re-written. + diff --git a/lib/iris/experimental/fieldsfile.py b/lib/iris/experimental/fieldsfile.py index 9489277d1f..a8c7a195ea 100644 --- a/lib/iris/experimental/fieldsfile.py +++ b/lib/iris/experimental/fieldsfile.py @@ -17,13 +17,24 @@ """ High-speed loading of structured FieldsFiles. -""" +.. deprecated:: 1.10 + + This module has now been *deprecated*. + Please use :mod:`iris.fileformats.um.structured_um_loading` instead. +""" from __future__ import (absolute_import, division, print_function) from six.moves import (filter, input, map, range, zip) # noqa import os +from iris._deprecation import warn_deprecated + +# Issue a deprecation message when the module is loaded. +warn_deprecated("The module 'iris.experimental.fieldsfile' is deprecated. " + "Please use iris.fileformats.um.structured_um_loading" + "as a replacement.") + from iris.coords import DimCoord from iris.cube import CubeList from iris.exceptions import TranslationError @@ -144,7 +155,14 @@ def load(filenames, callback=None): avoided, as not all irregularities are detected, which can cause erroneous results. + """ + warn_deprecated( + "The module 'iris.experimental.fieldsfile' is deprecated. " + "Please use the 'iris.fileformats.um.structured_um_loading' facility " + "as a replacement." + "\nA call to 'iris.experimental.fieldsfile.load' can be replaced with " + "'iris.load_raw', within a 'structured_um_loading' context.") loader = Loader(_collations_from_filename, {}, _convert_collation, None) return CubeList(load_cubes(filenames, callback, loader, None)) diff --git a/lib/iris/fileformats/um/__init__.py b/lib/iris/fileformats/um/__init__.py index 2b84969597..3625f79a62 100644 --- a/lib/iris/fileformats/um/__init__.py +++ b/lib/iris/fileformats/um/__init__.py @@ -25,5 +25,6 @@ # Publish the FF-replacement features here, and include documentation. from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee from ._fast_load import structured_um_loading +from ._fast_load_structured_fields import FieldCollation __all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee', - 'structured_um_loading'] + 'structured_um_loading', 'FieldCollation'] diff --git a/lib/iris/fileformats/um/_fast_load.py b/lib/iris/fileformats/um/_fast_load.py index 745e305c94..be0d8a2b35 100644 --- a/lib/iris/fileformats/um/_fast_load.py +++ b/lib/iris/fileformats/um/_fast_load.py @@ -242,27 +242,6 @@ def key_func(item): dim_coords_and_dims, aux_coords_and_dims) -# Control to enable/disable the "_combine_structured_cubes" call. -_STRUCTURED_LOAD_IS_RAW = False - - -@contextmanager -def _raw_structured_loading(): - """ - A private context manager called specifically by :func:`iris.load_raw`, to - stop the loader from concatenating its result cubes in that case. - - """ - import iris.fileformats.pp as pp - global _STRUCTURED_LOAD_IS_RAW - try: - old_raw_flag = _STRUCTURED_LOAD_IS_RAW - _STRUCTURED_LOAD_IS_RAW = True - yield - finally: - _STRUCTURED_LOAD_IS_RAW = old_raw_flag - - def _combine_structured_cubes(cubes): # Combine structured cubes from different sourcefiles, in the style of # merge/concatenate. @@ -319,8 +298,12 @@ def structured_um_loading(): """ Load cubes from structured UM Fieldsfile and PP files. - This is a context manager that enables an alternative loading mechanism for - 'structured' UM files, providing much faster load times. + "Structured" loading is a streamlined, fast load operation, to be used + **only** on fieldsfiles or PP files whose fields repeat regularly over + the same vertical levels and times (see full details below). + + This method is a context manager which enables an alternative loading + mechanism for 'structured' UM files, providing much faster load times. Within the scope of the context manager, this affects all standard Iris load functions (:func:`~iris.load`, :func:`~iris.load_cube`, :func:`~iris.load_cubes` and :func:`~iris.load_raw`), when loading from UM @@ -338,32 +321,53 @@ def structured_um_loading(): - Notes on applicability: + The results from this are normally equivalent to those generated by + :func:`iris.load`, but the operation is substantially faster for input + which is structured. - This is a streamlined load operation, to be used *only* on fieldsfiles or - PP files whose fields repeat regularly over the same vertical levels - and times. + For calls other than :meth:`~iris.load_raw`, the resulting cubes are + concatenated over all the input files, so there is normally just one + output cube per phenomenon. - The results aim to be equivalent to those generated by :func:`iris.load`, - but the operation is substantially faster for input that is structured. + However, actual loaded results are somewhat different from non-structured + loads in many cases, and in a variety of ways. Most commonly, dimension + ordering and the choice of dimension coordinates are often different. - The structured input files must conform to the following requirements: + Use of load callbacks: - * the file must contain fields for all possible combinations of the - vertical levels and time points found in the file. + When a user callback function is used with structured-loading, it is + called in a somewhat different way than in a 'normal' load : + The callback is called once for each basic *structured* cube loaded, + which is normally the whole of one phenomenon from a single input file. + In particular, the callback's "field" argument is a + :class:`~iris.fileformats.um.FieldCollation`, from which "field.fields" + gives a *list* of PPFields from which that cube was built. + The code required is therefore different from a 'normal' callback. - * the fields must occur in a regular repeating order within the file. + Notes on applicability: + + For results to be **correct and reliable**, the input files must + conform to the following requirements : + + * the file must contain fields for all possible combinations of the + vertical levels and time points found in the file. - (For example: a sequence of fields for NV vertical levels, repeated - for NP different forecast periods, repeated for NT different - forecast times). + * the fields must occur in a regular repeating order within the file, + within the fields of each phenomenon. - * all other metadata must be identical across all fields of the same - phenomenon. + For example: a sequence of fields for NV vertical levels, repeated + for NP different forecast periods, repeated for NT different + forecast times. - Each group of fields with the same values of LBUSER4, LBUSER7 and - LBPROC is identified as a separate phenomenon: These groups are - processed independently and returned as separate result cubes. + * all other metadata must be identical across all fields of the same + phenomenon. + + Each group of fields with the same values of LBUSER4, LBUSER7 and + LBPROC is identified as a separate phenomenon: These groups are + processed independently and returned as separate result cubes. + The need for a regular sequence of fields applies separately to the + fields of each phenomenon, such that different phenomena may have + different field structures, and can be interleaved in any way at all. .. note:: @@ -384,10 +388,37 @@ def structured_um_loading(): .. warning:: + Restrictions and limitations: + Any non-regular metadata variation in the input should be strictly avoided, as not all irregularities are detected, which can cause erroneous results. + Various field header words which can in some cases vary are assumed to + have a constant value throughout a given phenomenon. This is **not** + checked, and can lead to erroneous results if it is not the case. + Header elements of potential concern include LBTIM, LBCODE, LBVC, + LBRSVD4 (ensemble number) and LBUSER5 (pseudo-level). + + Known current shortcomings: + + * orography fields may be returned with extra dimensions, e.g. time, + where multiple fields exist in an input file. + + * varying values of LBUSER5, representing a 'pseudo-level' coordinate, + are not currently supported. + + * Unfortunately, there is no good workaround for this at present. + + * if some input files contain a single coordinate value while other + contain multiple values, these will not be merged into a single cube + over all input files : Instead, the single- and multiple-valued sets + will typically merge into two separate cubes with overlapping + coordinates. + + * this can be worked around by loading files individually, or with + :meth:`~iris.load_raw`, and merging/concatenating explicitly. + """ with STRUCTURED_LOAD_CONTROLS.context(loads_use_structured=True): yield diff --git a/lib/iris/tests/integration/fast_load/test_fast_load.py b/lib/iris/tests/integration/fast_load/test_fast_load.py index 73fa1ec671..0c09a13c03 100644 --- a/lib/iris/tests/integration/fast_load/test_fast_load.py +++ b/lib/iris/tests/integration/fast_load/test_fast_load.py @@ -77,7 +77,7 @@ def tearDown(self): self.load_context.__exit__(None, None, None) def _temp_filepath(self, user_name='', suffix='.pp'): - # Return a filepath for a new temporary file. + # Return the filepath for a new temporary file. self.tempfile_count += 1 file_path = self.tempfile_path_fmt.format( dir_path=self.temp_dir_path,