diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 17c550c..45ddf1d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,9 +20,10 @@ jobs: - name: Checkout and use lfs uses: actions/checkout@v2 with: - lfs: true - - name: Download LFS - run: git lfs checkout + # lfs: true + lfs: false + # - name: Download LFS + # run: git lfs checkout - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -37,6 +38,8 @@ jobs: run: | pip install pytest pytest-cov pytest --import-mode=append --cov-config=.coveragerc --cov=ruins --cov-report=xml + env: + NO_LFS: true - name: Upload coverage to codecov uses: codecov/codecov-action@v2 diff --git a/README.md b/README.md index d75b0a3..d9c517a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # RUINS climate data and regional model app +![GitHub release (latest by date)](https://img.shields.io/github/v/release/hydrocode-de/RUINSapp?color=success&logo=Github) +![PyPI](https://img.shields.io/pypi/v/ruins-app?color=success&logo=PyPI) +[![Test RUINS](https://github.com/hydrocode-de/RUINSapp/actions/workflows/main.yml/badge.svg)](https://github.com/hydrocode-de/RUINSapp/actions/workflows/main.yml) +[![codecov](https://codecov.io/gh/hydrocode-de/RUINSapp/branch/main/graph/badge.svg?token=SFxENKltZb)](https://codecov.io/gh/hydrocode-de/RUINSapp) + + This is a compilation of tools to assess and visualise climate data and climate model projections at the German North Sea coast. This app is intended to especially focus on the treatment of uncertainties within the data and model projections. For more information about the RUINS project, see our website. diff --git a/requirements.txt b/requirements.txt index aade70b..0725afd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ statsmodels xarray climate-indices pillow -sklearn \ No newline at end of file +sklearn +fire \ No newline at end of file diff --git a/ruins/__init__.py b/ruins/__init__.py index 290d7c6..abeeedb 100644 --- a/ruins/__init__.py +++ b/ruins/__init__.py @@ -1 +1 @@ -__version__ = '0.3.0' \ No newline at end of file +__version__ = '0.4.0' diff --git a/ruins/core/__init__.py b/ruins/core/__init__.py index 3558f42..d1db6a5 100644 --- a/ruins/core/__init__.py +++ b/ruins/core/__init__.py @@ -1 +1,2 @@ -from .config import Config \ No newline at end of file +from .config import Config +from .data_manager import DataManager \ No newline at end of file diff --git a/ruins/core/data_manager.py b/ruins/core/data_manager.py new file mode 100644 index 0000000..f1e4395 --- /dev/null +++ b/ruins/core/data_manager.py @@ -0,0 +1,229 @@ +""" +Data Manager +============ + +The DataManager is a wrapper around all data sources used by RUINSapp. +It can be configures by any :class:`Config ` class +and organizes or caches all data sources using a +:class:`DataSource ` inherited class. +This makes the read and filter interface available on all sources, no matter +where they are stored. +Using the :class:`Config ` to instantiate a data +manager can in principle enabled different profiles, or even an interaction +with the frontend, although not implemented nor desired at the current stage. + +Example +------- + +.. code-block:: python + + from ruins import core + + # create default config + conf = core.Config() + + # create a data manager from this + dm = core.DataManager(**conf) + +Of course, the data manager can also be used without the config, ie. to open it +in debug mode: + +.. code-block:: python + + # using conf with conf.debug=False and overwrite it + dm = core.DataManager(**conf, debug=True) + +""" +import abc +import os +import glob +import xarray as xr +from collections.abc import Mapping +from typing import Type, List + + +DEFAULT_MIMES = { + 'nc': 'HDF5Source' +} + + +class DataSource(abc.ABC): + def __init__(self, **kwargs): + self._kwargs = kwargs + + @abc.abstractmethod + def read(self): + pass + + @abc.abstractmethod + def filter(self, **kwargs): + pass + + +class HDF5Source(DataSource): + def __init__(self, path: str, cache: bool = True, **kwargs): + super().__init__(**kwargs) + self.path = path + self.cache = cache + + def _load_source(self): + """Method to load the actual source on the disk""" + return xr.load_dataset(self.path) + + def read(self): + if self.cache: + if not hasattr(self, 'data'): + self.data = self._load_source() + return self.data + + else: + return self._load_source() + + def filter(self): + pass + + +class DataManager(Mapping): + """Main class for accessing different data sources. + + The DataManager holds and manages all data sources. The default behavior is + to scan the specified path for files of known file extension and cache them + in memory. + + Parameters + ---------- + datapath : str + A location where the data is stored. The class will load all sources + there and make them accessible through DataSource classes. + cache : bool + Will be passed to the DataSource classes. It true, the source will only + be read once and then stored in memory until the DataManager gets + deconstructed. + include_mimes : dict + A dictionary of file extensions and their corresponding DataSource. + If something is not listed, the DataManager will ignore the file type. + The include_mimes can be overwritten by passing filenames directly. + + """ + def __init__(self, datapath: str = None, cache: bool = True, debug: bool = False, **kwargs) -> None: + """ + You can pass in a Config as kwargs. + """ + # check if the no config - or config without datapath - was passed + if datapath is None: + from ruins.core import Config + self.from_config(**Config(**kwargs)) + else: + self.from_config(datapath=datapath, cache=cache, debug=debug, **kwargs) + + def from_config(self, datapath: str = None, cache: bool = True, debug: bool = False, **kwargs) -> None: + """ + Initialize the DataManager from a :class:`Config ` object. + """ + # store the main settings + self._config = kwargs + self._datapath = datapath + self.cache = cache + self.debug = debug + + # file settings + self._data_sources = {} + + # infer data source + if self._datapath is not None: + self._infer_from_folder() + + @property + def datapath(self) -> str: + return self._datapath + + @datapath.setter + def datapath(self, path: str) -> None: + if os.path.exists(path): + self._datapath = path + self._infer_from_folder() + else: + raise OSError(f"{path} does not exist.") + + @property + def datasources(self) -> List[DataSource]: + return list(self._data_sources.keys()) + + def _infer_from_folder(self) -> None: + """ + Read all files from the datapath as specified on instantiation. + Calls :func:`add_source` on each file. + """ + # get a list of all files + file_list = glob.glob(os.path.join(self.datapath, '*')) + file_list.extend(glob.glob(os.path.join(self.datapath, '**', '*'))) + + + for fname in file_list: + self.add_source(path=fname, not_exists='warn' if self.debug else 'ignore') + + def add_source(self, path: str, not_exists: str = 'raise') -> None: + """ + Add a file as data source to the DataManager. + Only if the file has an allowed file extension, it will be managed. + Files of same name will be overwritten, this is also true if they had + different extensions. + + """ + # load the tracked + mimes = self._config.get('include_mimes', DEFAULT_MIMES) + + # get the basename + try: + basename, mime = os.path.basename(path).split('.') + except ValueError: + if self.debug: + print(f"[Warning]: {path} has no extension.") + return + + if mime in mimes.keys(): + # get the class - overwirte by direct kwargs settings if needed + clsName = mimes[mime] if basename not in self._config else self._config[basename] + BaseClass = self.resolve_class_name(clsName) + + # add the source + args = self._config.get(basename, {}) + args.update({'path': path, 'cache': self.cache}) + self._data_sources[basename] = BaseClass(**args) + else: + if not_exists == 'raise': + raise OSError(f"{path} is not a configured data source") + elif not_exists == 'ignore': + pass + elif not_exists == 'warn': + print(f"{path} is found, but not a configured data source") + + def resolve_class_name(self, cls_name: str) -> Type[DataSource]: + # checkout globals + cls = globals().get(cls_name, False) + + # do we have a class? + if not cls: + # TODO, there is maybe an extension module to search one day + raise RuntimeError(f"Can't find class {cls_name}.") + + return cls + + def __len__(self): + """Return the number of managed data sources""" + return len(self._data_sources) + + def __iter__(self): + """Iterate over all dataset names""" + for name in self._data_sources.keys(): + yield name + + def __getitem__(self, key: str): + """Return the requested datasource""" + return self._data_sources[key] + + def __repr__(self): + return f"{self.__class__.__name__}(datapath={self.datapath}, cache={self.cache})" + + def __str__(self): + return f"" diff --git a/ruins/tests/test_datamanager.py b/ruins/tests/test_datamanager.py new file mode 100644 index 0000000..25e1cbd --- /dev/null +++ b/ruins/tests/test_datamanager.py @@ -0,0 +1,40 @@ +import xarray as xr +import os + +from ruins.core import DataManager +from ruins.core.data_manager import HDF5Source + +# some datasources are backed by git-lfs which have to be disabled on +# github actions +NO_LFS = 'NO_LFS' in os.environ + + +def test_default_manager(): + """Instantiate the default data manager""" + dm = DataManager() + + assert dm.cache == True + + # find some datasets weather dataset + assert 'cordex_coast' in dm.datasources + assert 'CMIP5grid' in dm.datasources + + +def test_weather_dataset(): + """Test the weather dataset""" + dm = DataManager() + + # check weather dataset was loaded + assert 'weather' in dm.datasources + + # check Source type + weather = dm['weather'] + assert isinstance(weather, HDF5Source) + + # load the data + if NO_LFS: + print('No LFS, skipping partial test') + return + + data = weather.read() + assert isinstance(data, xr.Dataset) diff --git a/version.py b/version.py new file mode 100644 index 0000000..5daf726 --- /dev/null +++ b/version.py @@ -0,0 +1,51 @@ +import os +from ruins import __version__ + +def increment(which='patch'): + """ + Increment the version number. + """ + parts = __version__.split('.') + if which == 'patch': + parts[2] = str(int(parts[2]) + 1) + elif which == 'minor': + parts[1] = str(int(parts[1]) + 1) + parts[2] = '0' + elif which == 'major': + parts[0] = str(int(parts[0]) + 1) + parts[1] = '0' + parts[2] = '0' + else: + raise ValueError("Invalid version increment.") + return '.'.join(parts) + + +def replace(which='patch'): + """ + Increment the version number for RUINS. + + """ + # find the file + path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'ruins', '__init__.py')) + + # read + with open(path, 'r') as f: + lines = f.readlines() + + # replace the version + for i, line in enumerate(lines): + if '__version__' in line: + new_version = increment(which) + lines[i] = f"__version__ = '{new_version}'\n" + break + + # overwrite + with open(path, 'w') as f: + f.writelines(lines) + + print(new_version) + + +if __name__ == '__main__': + import fire + fire.Fire(replace) \ No newline at end of file