diff --git a/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst b/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst index 3d9b0096af..8199044a5c 100644 --- a/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst +++ b/docs/sphinx/source/user_guide/modeling_topics/weather_data.rst @@ -76,7 +76,7 @@ them. Usage ----- -With some exceptions, the :py:mod:`pvlib.iotools` functions +The :py:mod:`pvlib.iotools` functions provide a uniform interface for accessing data across many formats. Specifically, :py:mod:`pvlib.iotools` functions usually return two objects: a :py:class:`pandas.DataFrame` of the actual dataset, plus a metadata @@ -89,8 +89,7 @@ Typical usage looks something like this: .. code-block:: python - # get_pvgis_tmy returns two additional values besides df and metadata - df, _, _, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True) + df, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True) This code will fetch a Typical Meteorological Year (TMY) dataset from PVGIS, returning a :py:class:`pandas.DataFrame` containing the hourly weather data diff --git a/docs/sphinx/source/whatsnew/v0.12.1.rst b/docs/sphinx/source/whatsnew/v0.12.1.rst index 4e1fd969ba..34502715c7 100644 --- a/docs/sphinx/source/whatsnew/v0.12.1.rst +++ b/docs/sphinx/source/whatsnew/v0.12.1.rst @@ -11,6 +11,10 @@ Breaking Changes following the iotools convention instead of ``(data,inputs,meta)``. The ``inputs`` dictionary is now included in ``meta``, which has changed structure to accommodate it. (:pull:`2462`) +* The functions :py:func:`~pvlib.iotools.read_pvgis_tmy` and + :py:func:`~pvlib.iotools.get_pvgis_tmy` now return ``(data,meta)`` + following the iotools convention instead of ``(data,months_selected,inputs,meta)``. + (:pull:`2470`) * Remove ``outputformat='basic'`` option in :py:func:`~pvlib.iotools.get_pvgis_tmy`. (:pull:`2416`) diff --git a/pvlib/iotools/pvgis.py b/pvlib/iotools/pvgis.py index 9f257d530f..9bfd7f5a79 100644 --- a/pvlib/iotools/pvgis.py +++ b/pvlib/iotools/pvgis.py @@ -261,7 +261,7 @@ def _parse_pvgis_hourly_json(src, map_variables): def _parse_pvgis_hourly_csv(src, map_variables): # The first 4 rows are latitude, longitude, elevation, radiation database - metadata = {'inputs': {}} + metadata = {'inputs': {}, 'descriptions': {}} # 'location' metadata # 'Latitude (decimal degrees): 45.000\r\n' metadata['inputs']['latitude'] = float(src.readline().split(':')[1]) @@ -440,6 +440,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, For more information see the PVGIS [1]_ TMY tool documentation [2]_. + .. versionchanged:: 0.13.0 + The function now returns two items ``(data,meta)``. Previous + versions of this function returned four elements + ``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary + and ``months_selected`` are now included in ``meta``, which has + changed structure to accommodate it. + Parameters ---------- latitude : float @@ -478,10 +485,6 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, ------- data : pandas.DataFrame the weather data - months_selected : list - TMY year for each month, ``None`` for EPW - inputs : dict - the inputs, ``None`` for EPW metadata : list or dict file metadata @@ -527,17 +530,16 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, else: raise requests.HTTPError(err_msg['message']) # initialize data to None in case API fails to respond to bad outputformat - data = None, None, None, None + data = None, None if outputformat == 'json': src = res.json() - data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src) + data, meta = _parse_pvgis_tmy_json(src) elif outputformat == 'csv': with io.BytesIO(res.content) as src: - data, months_selected, inputs, meta = _parse_pvgis_tmy_csv(src) + data, meta = _parse_pvgis_tmy_csv(src) elif outputformat == 'epw': with io.StringIO(res.content.decode('utf-8')) as src: data, meta = read_epw(src) - months_selected, inputs = None, None elif outputformat == 'basic': err_msg = ("outputformat='basic' is no longer supported by pvlib, " "please use outputformat='csv' instead.") @@ -551,34 +553,37 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, coerce_year = coerce_year or 1990 data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year) - return data, months_selected, inputs, meta + return data, meta def _parse_pvgis_tmy_json(src): - inputs = src['inputs'] - meta = src['meta'] - months_selected = src['outputs']['months_selected'] + meta = src['meta'].copy() + # Override the "inputs" in metadata + meta['inputs'] = src['inputs'] + # Re-add the inputs in metadata one-layer down + meta['inputs']['descriptions'] = src['meta']['inputs'] + meta['months_selected'] = src['outputs']['months_selected'] data = pd.DataFrame(src['outputs']['tmy_hourly']) data.index = pd.to_datetime( data['time(UTC)'], format='%Y%m%d:%H%M', utc=True) data = data.drop('time(UTC)', axis=1) - return data, months_selected, inputs, meta + return data, meta def _parse_pvgis_tmy_csv(src): # the first 3 rows are latitude, longitude, elevation - inputs = {} + meta = {'inputs': {}, 'descriptions': {}} # 'Latitude (decimal degrees): 45.000\r\n' - inputs['latitude'] = float(src.readline().split(b':')[1]) + meta['inputs']['latitude'] = float(src.readline().split(b':')[1]) # 'Longitude (decimal degrees): 8.000\r\n' - inputs['longitude'] = float(src.readline().split(b':')[1]) + meta['inputs']['longitude'] = float(src.readline().split(b':')[1]) # Elevation (m): 1389.0\r\n - inputs['elevation'] = float(src.readline().split(b':')[1]) + meta['inputs']['elevation'] = float(src.readline().split(b':')[1]) # TMY has an extra line here: Irradiance Time Offset (h): 0.1761\r\n line = src.readline() if line.startswith(b'Irradiance Time Offset'): - inputs['irradiance time offset'] = float(line.split(b':')[1]) + meta['inputs']['irradiance time offset'] = float(line.split(b':')[1]) src.readline() # skip over the "month,year\r\n" else: # `line` is already the "month,year\r\n" line, so nothing to do @@ -589,6 +594,7 @@ def _parse_pvgis_tmy_csv(src): for month in range(12): months_selected.append( {'month': month+1, 'year': int(src.readline().split(b',')[1])}) + meta['months_selected'] = months_selected # then there's the TMY (typical meteorological year) data # first there's a header row: # time(UTC),T2m,RH,G(h),Gb(n),Gd(h),IR(h),WS10m,WD10m,SP @@ -601,14 +607,26 @@ def _parse_pvgis_tmy_csv(src): data = pd.DataFrame(data, dtype=float) data.index = dtidx # finally there's some meta data - meta = [line.decode('utf-8').strip() for line in src.readlines()] - return data, months_selected, inputs, meta + meta['descriptions'] = {} + for line in src.readlines(): + line = line.decode('utf-8').strip() + if ':' in line: + meta['descriptions'][line.split(':')[0]] = \ + line.split(':')[1].strip() + return data, meta def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True): """ Read a TMY file downloaded from PVGIS. + .. versionchanged:: 0.13.0 + The function now returns two items ``(data,meta)``. Previous + versions of this function returned four elements + ``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary + and ``months_selected`` are now included in ``meta``, which has + changed structure to accommodate it. + Parameters ---------- filename : str, pathlib.Path, or file-like buffer @@ -629,10 +647,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True): ------- data : pandas.DataFrame the weather data - months_selected : list - TMY year for each month, ``None`` for EPW - inputs : dict - the inputs, ``None`` for EPW metadata : list or dict file metadata @@ -662,7 +676,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True): # EPW: use the EPW parser from the pvlib.iotools epw.py module if outputformat == 'epw': data, meta = read_epw(filename) - months_selected, inputs = None, None # NOTE: json and csv output formats have parsers defined as private # functions in this module @@ -676,16 +689,14 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True): except AttributeError: # str/path has no .read() attribute with open(str(filename), 'r') as fbuf: src = json.load(fbuf) - data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src) + data, meta = _parse_pvgis_tmy_json(src) elif outputformat == 'csv': try: - data, months_selected, inputs, meta = \ - _parse_pvgis_tmy_csv(filename) + data, meta = _parse_pvgis_tmy_csv(filename) except AttributeError: # str/path has no .read() attribute with open(str(filename), 'rb') as fbuf: - data, months_selected, inputs, meta = \ - _parse_pvgis_tmy_csv(fbuf) + data, meta = _parse_pvgis_tmy_csv(fbuf) elif outputformat == 'basic': err_msg = "outputformat='basic' is no longer supported, please use " \ @@ -702,7 +713,7 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True): if map_variables: data = data.rename(columns=VARIABLE_MAP) - return data, months_selected, inputs, meta + return data, meta def get_pvgis_horizon(latitude, longitude, url=URL, **kwargs): diff --git a/tests/iotools/test_pvgis.py b/tests/iotools/test_pvgis.py index 1cf52eb64d..cb5f408069 100644 --- a/tests/iotools/test_pvgis.py +++ b/tests/iotools/test_pvgis.py @@ -371,9 +371,7 @@ def meta_expected(): @pytest.fixture def csv_meta(meta_expected): - return [ - f"{k}: {v['description']} ({v['units']})" for k, v - in meta_expected['outputs']['tmy_hourly']['variables'].items()] + return meta_expected['outputs']['tmy_hourly']['variables'] @pytest.fixture @@ -393,7 +391,15 @@ def test_get_pvgis_tmy(expected, month_year_expected, inputs_expected, def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected, meta_expected, pvgis_data): - data, months_selected, inputs, meta = pvgis_data + data, meta = pvgis_data + + # Re-create original outputs (prior to #2470) + months_selected = meta['months_selected'] + inputs = meta['inputs'].copy() + del inputs['descriptions'] + meta['inputs'] = meta['inputs']['descriptions'] + del meta['months_selected'] + # check each column of output separately for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys(): assert np.allclose(data[outvar], expected[outvar]) @@ -419,10 +425,9 @@ def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected, @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_pvgis_tmy_kwargs(userhorizon_expected): - _, _, inputs, _ = get_pvgis_tmy(45, 8, usehorizon=False, - map_variables=False) - assert inputs['meteo_data']['use_horizon'] is False - data, _, _, _ = get_pvgis_tmy( + _, meta = get_pvgis_tmy(45, 8, usehorizon=False, map_variables=False) + assert meta['inputs']['meteo_data']['use_horizon'] is False + data, _ = get_pvgis_tmy( 45, 8, userhorizon=[0, 10, 20, 30, 40, 15, 25, 5], map_variables=False) assert np.allclose( data['G(h)'], userhorizon_expected['G(h)'].values) @@ -430,17 +435,17 @@ def test_get_pvgis_tmy_kwargs(userhorizon_expected): data['Gb(n)'], userhorizon_expected['Gb(n)'].values) assert np.allclose( data['Gd(h)'], userhorizon_expected['Gd(h)'].values) - _, _, inputs, _ = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False) - assert inputs['meteo_data']['year_min'] == 2005 - _, _, inputs, _ = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False) - assert inputs['meteo_data']['year_max'] == 2016 + _, meta = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False) + assert meta['inputs']['meteo_data']['year_min'] == 2005 + _, meta = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False) + assert meta['inputs']['meteo_data']['year_max'] == 2016 @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_pvgis_tmy_coerce_year(): """test utc_offset and coerce_year work as expected""" - base_case, _, _, _ = get_pvgis_tmy(45, 8) # Turin + base_case, _ = get_pvgis_tmy(45, 8) # Turin assert str(base_case.index.tz) == 'UTC' assert base_case.index.name == 'time(UTC)' noon_test_data = [ @@ -449,9 +454,9 @@ def test_get_pvgis_tmy_coerce_year(): cet_tz = 1 # Turin time is CET cet_name = 'Etc/GMT-1' # check indices of rolled data after converting timezone - pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz) - jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name) - dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name) + pvgis_data, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz) + jan1_midnight = pd.Timestamp('1990-01-01 00', tz=cet_name) + dec31_midnight = pd.Timestamp('1990-12-31 23', tz=cet_name) assert pvgis_data.index[0] == jan1_midnight assert pvgis_data.index[-1] == dec31_midnight assert pvgis_data.index.name == f'time({cet_name})' @@ -461,10 +466,10 @@ def test_get_pvgis_tmy_coerce_year(): assert all(test_case == expected) # repeat tests with year coerced test_yr = 2021 - pvgis_data, _, _, _ = get_pvgis_tmy( + pvgis_data, _ = get_pvgis_tmy( 45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr) - jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name) - dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz=cet_name) + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz=cet_name) assert pvgis_data.index[0] == jan1_midnight assert pvgis_data.index[-1] == dec31_midnight assert pvgis_data.index.name == f'time({cet_name})' @@ -472,9 +477,9 @@ def test_get_pvgis_tmy_coerce_year(): expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz] assert all(test_case == expected) # repeat tests with year coerced but utc offset none or zero - pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr) - jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC') - dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC') + pvgis_data, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz='UTC') + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz='UTC') assert pvgis_data.index[0] == jan1_midnight assert pvgis_data.index[-1] == dec31_midnight assert pvgis_data.index.name == 'time(UTC)' @@ -494,7 +499,13 @@ def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected, def _compare_pvgis_tmy_csv(expected, month_year_expected, inputs_expected, meta_expected, csv_meta, pvgis_data): - data, months_selected, inputs, meta = pvgis_data + data, meta = pvgis_data + + # Re-create original outputs (prior to #2470) + months_selected = meta['months_selected'] + inputs = meta['inputs'].copy() + meta = meta['descriptions'] + # check each column of output separately for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys(): assert np.allclose(data[outvar], expected[outvar]) @@ -526,7 +537,7 @@ def test_get_pvgis_tmy_epw(expected, epw_meta): def _compare_pvgis_tmy_epw(expected, epw_meta, pvgis_data): - data, _, _, meta = pvgis_data + data, meta = pvgis_data assert np.allclose(data.ghi, expected['G(h)']) assert np.allclose(data.dni, expected['Gb(n)']) assert np.allclose(data.dhi, expected['Gd(h)']) @@ -556,8 +567,8 @@ def test_get_pvgis_tmy_basic(): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_pvgis_map_variables(pvgis_tmy_mapped_columns): - actual, _, _, _ = get_pvgis_tmy(45, 8, map_variables=True) +def test_get_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns): + actual, _ = get_pvgis_tmy(45, 8, map_variables=True) assert all(c in pvgis_tmy_mapped_columns for c in actual.columns) @@ -580,7 +591,7 @@ def test_read_pvgis_horizon_invalid_coords(): def test_read_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns): fn = TESTS_DATA_DIR / 'tmy_45.000_8.000_2005_2023.json' - actual, _, _, _ = read_pvgis_tmy(fn, map_variables=True) + actual, _ = read_pvgis_tmy(fn, map_variables=True) assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)