Skip to content
24 changes: 19 additions & 5 deletions esmvalcore/preprocessor/_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import iris
import numpy as np

from ._time import regrid_time

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -109,10 +111,12 @@ def _put_in_cube(template_cube, cube_data, statistic, t_axis):
if t_axis is None:
times = template_cube.coord('time')
else:
unit_name = template_cube.coord('time').units.name
tunits = cf_units.Unit(unit_name, calendar="standard")
times = iris.coords.DimCoord(
t_axis,
standard_name='time',
units=template_cube.coord('time').units)
units=tunits)

coord_names = [c.long_name for c in template_cube.coords()]
coord_names.extend([c.standard_name for c in template_cube.coords()])
Expand Down Expand Up @@ -164,25 +168,35 @@ def _put_in_cube(template_cube, cube_data, statistic, t_axis):

def _datetime_to_int_days(cube):
"""Return list of int(days) converted from cube datetime cells."""
cube = _align_yearly_axes(cube)
time_cells = [cell.point for cell in cube.coord('time').cells()]
time_unit = cube.coord('time').units.name
time_offset = _get_time_offset(time_unit)

# extract date info
real_dates = []
for date_obj in time_cells:
# real_date resets the actual data point day
# to the 1st of the month so that there are no
# wrong overlap indices
# NOTE: this workaround is good only
# for monthly data
real_date = datetime(date_obj.year, date_obj.month, 1, 0, 0, 0)
real_dates.append(real_date)

# get the number of days starting from the reference unit
time_unit = cube.coord('time').units.name
time_offset = _get_time_offset(time_unit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @valeriupredoi , this is one of the points that really confuses me. What happens if different cubes have different time offsets? Don't you want the offset with respect to a common reference time? I'm thinking something like

def _datetime_to_int_days(cube, overlap=False):
    """Return list of int(days) with respect to a common reference.

    Cubes may have different calendars. This function extracts the date
    information from the cube and re-constructs a default calendar using
    the datetime library.

    This doesn't work for daily or subdaily data, because different
    calendars may have different number of days in the year.

    The standard calendar used by datetime provides the to_ordinal()
    function which returns "int number of days since the 0001/01/01".
    """
    # Convert to a common time format
    years = [cell.point.year for cell in cube.coord('time').cells()]
    months = [cell.point.month for cell in cube.coord('time').cells()]
    if not 0 in np.diff(years):
        # yearly data
        standard_dates = [datetime(year, 7, 1) for year in years]
    elif not 0 in np.diff(months):
        # monthly data
        standard_dates = [datetime(year, month, 15) for year, month in zip(years, months)]
    else:
        raise ValueError("Multimodel only supports yearly or monthly data")

    return [dt.to_ordinal() for dt in standard_dates]

I can work this out further if you like?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that's exactly that, I use the number of days since the start of the data taken from units forcing a STANDARD_CALENDAR eg days since 1950-01-01, calendar=STANDARD_CALENDAR - this works fine if there is no variation in calendars since the units are all the same for all cubes after cmor.fix unified the units, but I suspect it's the cause of the shifts @LisaBock is seeing when calendars differ

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw isn't it toordinal()?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw isn't it toordinal()?

Right you are 🍺

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this works fine if there is no variation in calendars

So I'd rather just fix it, either to the ordinal or to 1950/01/01. Taking it from the cube seems unnecessary and error-prone.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah not the best code to understand indeed! but it does do a lot of tricksy stuff nonetheless. Can I suggest we finish up with this PR in terms of functionality for the yearly (normal) data and merge then we look into Lisa's issue, the more I look at it the more it looks to be way separate 🍺

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree we should respect the scope of the PR, but let's not merge to hastily. I'm not so keen on the overlap argument for the yearly data. But removing it in favor of the above suggestion breaks several tests. Can I look into fixing that first?

Copy link
Contributor Author

@valeriupredoi valeriupredoi Jun 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would be very much appreciated mate and will buy a 🍺

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I fixed most of it, but some tests still fail because test cube 2 has a daily time coordinate, and the code above raises an exception in that case. What to do with that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's not make the changes - your suggestion below saved the day, so let's keep the changes to a minimum and only commit what's necessary - let me take it from here 🍺

days = [(date_obj - time_offset).days for date_obj in real_dates]

return days


def _align_yearly_axes(cube):
"""Perform a time-regridding operation to align time axes for yr data."""
years = [cell.point.year for cell in cube.coord('time').cells()]
# be extra sure that the first point is not in the previous year
if 0 not in np.diff(years):
return regrid_time(cube, 'yr')
return cube


def _get_overlap(cubes):
"""
Get discrete time overlaps.
Expand Down
45 changes: 41 additions & 4 deletions tests/unit/preprocessor/_multimodel/test_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,22 @@ def setUp(self):
units=Unit(
'days since 1950-01-01',
calendar='gregorian'))

yr_time = iris.coords.DimCoord([15, 410],
standard_name='time',
bounds=[[1., 30.], [395., 425.]],
units=Unit(
'days since 1950-01-01',
calendar='gregorian'))
yr_time2 = iris.coords.DimCoord([1., 367., 733., 1099.],
standard_name='time',
bounds=[
[0.5, 1.5],
[366, 368],
[732, 734],
[1098, 1100], ],
units=Unit(
'days since 1950-01-01',
calendar='gregorian'))
zcoord = iris.coords.DimCoord([0.5, 5., 50.],
standard_name='air_pressure',
long_name='air_pressure',
Expand All @@ -75,6 +90,14 @@ def setUp(self):
coords_spec5 = [(time2, 0), (zcoord, 1), (lats, 2), (lons, 3)]
self.cube2 = iris.cube.Cube(data3, dim_coords_and_dims=coords_spec5)

coords_spec4_yr = [(yr_time, 0), (zcoord, 1), (lats, 2), (lons, 3)]
self.cube1_yr = iris.cube.Cube(data2,
dim_coords_and_dims=coords_spec4_yr)

coords_spec5_yr = [(yr_time2, 0), (zcoord, 1), (lats, 2), (lons, 3)]
self.cube2_yr = iris.cube.Cube(data3,
dim_coords_and_dims=coords_spec5_yr)

def test_get_time_offset(self):
"""Test time unit."""
result = _get_time_offset("days since 1950-01-01")
Expand All @@ -91,20 +114,34 @@ def test_compute_statistic(self):
self.assert_array_equal(stat_mean, expected_mean)
self.assert_array_equal(stat_median, expected_median)

def test_compute_full_statistic_cube(self):
def test_compute_full_statistic_mon_cube(self):
data = [self.cube1, self.cube2]
stats = multi_model_statistics(data, 'full', ['mean'])
expected_full_mean = np.ma.ones((2, 3, 2, 2))
expected_full_mean.mask = np.zeros((2, 3, 2, 2))
expected_full_mean.mask[1] = True
self.assert_array_equal(stats['mean'].data, expected_full_mean)

def test_compute_overlap_statistic_cube(self):
def test_compute_full_statistic_yr_cube(self):
data = [self.cube1_yr, self.cube2_yr]
stats = multi_model_statistics(data, 'full', ['mean'])
expected_full_mean = np.ma.ones((4, 3, 2, 2))
expected_full_mean.mask = np.zeros((4, 3, 2, 2))
expected_full_mean.mask[2:4] = True
self.assert_array_equal(stats['mean'].data, expected_full_mean)

def test_compute_overlap_statistic_mon_cube(self):
data = [self.cube1, self.cube1]
stats = multi_model_statistics(data, 'overlap', ['mean'])
expected_ovlap_mean = np.ma.ones((2, 3, 2, 2))
self.assert_array_equal(stats['mean'].data, expected_ovlap_mean)

def test_compute_overlap_statistic_yr_cube(self):
data = [self.cube1_yr, self.cube1_yr]
stats = multi_model_statistics(data, 'overlap', ['mean'])
expected_ovlap_mean = np.ma.ones((2, 3, 2, 2))
self.assert_array_equal(stats['mean'].data, expected_ovlap_mean)

def test_compute_std(self):
"""Test statistic."""
data = [self.cube1.data[0], self.cube2.data[0] * 2]
Expand Down Expand Up @@ -134,7 +171,7 @@ def test_put_in_cube(self):
stat_cube = _put_in_cube(self.cube1, cube_data, "mean", t_axis=None)
self.assert_array_equal(stat_cube.data, self.cube1.data)

def test_datetime_to_int_days(self):
def test_datetime_to_int_days_no_overlap(self):
"""Test _datetime_to_int_days."""
computed_dats = _datetime_to_int_days(self.cube1)
expected_dats = [0, 31]
Expand Down