diff --git a/README.md b/README.md index 0c8a3ad8..99c9a422 100644 --- a/README.md +++ b/README.md @@ -1,73 +1,15 @@ # Environmental Simulator Quest -| Gitter | Linux/Mac | Windows | Test Coverage | -| --------- | --------- | --------- | ------------- | -| [![Join the chat at https://gitter.im/Quest-Development/Quest](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Quest-Development/Quest) | [![Build Status](https://travis-ci.org/erdc/quest.svg?branch=master)](https://travis-ci.org/erdc/quest) | [![Build Status](https://ci.appveyor.com/api/projects/status/e20arxcfrcmb2ylm/branch/master?svg=true)](https://ci.appveyor.com/project/dharhas/quest) | [![Coverage Status](https://coveralls.io/repos/github/erdc/quest/badge.svg)](https://coveralls.io/github/erdc/quest) | +| Gitter | Linux/Mac | Windows | ReadTheDocs | Test Coverage | +| --------- | --------- | --------- | ------------- | ------------- | +| [![Join the chat at https://gitter.im/Quest-Development/Quest](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Quest-Development/Quest) | [![Build Status](https://travis-ci.org/erdc/quest.svg?branch=master)](https://travis-ci.org/erdc/quest) | [![Build Status](https://ci.appveyor.com/api/projects/status/e20arxcfrcmb2ylm/branch/master?svg=true)](https://ci.appveyor.com/project/dharhas/quest) | [![Documentation Status](https://readthedocs.org/projects/quest/badge/?version=latest)](https://quest.readthedocs.io/en/latest/?badge=latest) | [![Coverage Status](https://coveralls.io/repos/github/erdc/quest/badge.svg)](https://coveralls.io/github/erdc/quest) | ### Project Description -Quest is a python library that provides an API the ability to search, publish and download data (both geographical and non-geographical) across multiple data sources including both local repositories and web based services. The library also allows provides tools in order to manipulate and manage the data that the user is working with. +Quest is a Python library that provides the ability to search, publish and download data (both geographical and non-geographical) from multiple data sources, including local repositories and web-based data providers. Quest also provides a suite of tools for manipulating and transforming data once it is downloaded. ### Project Links - Here is a live link for the Quest Documentation: https://quest.readthedocs.io/en/latest/ -## Setup Dev Environment - -- Install miniconda -- Install conda-env - - conda install conda-env - -- Clone master branch -- Create a new conda environment for development - - conda env create -n quest -f py3_conda_requirements.yml - - (you can also create a python 2 env but 3 is preferred) - -- Install quest in develop mode - - python setup.py develop - -## Development Workflow - -- change to master branch - - git checkout master - -- get the latest version of master - - git pull master - -- create a new branch locally - - git checkout -b mybranch - -- Develop the new features on your local machine, add tests for any new features -- push the local branch to gitlab and set up tracking, later `git push` is all that is required. - - git push -u origin mybranch - -- run tests on python 2 and python 3 using py.test -- Once you have finished developing your branch, check if master has changed - - git checkout master - - git pull - -- If `git pull` pulls in new changes from master then you need to rebase - - git checkout mybranch - - git rebase master - - (follow the prompts, you may have to fix conflicts) - -- after a rebase you may have to force push to gitlab on your branch - - git push -f - -- Run tests again. -- If everything looks good, use Gitlab to do a merge request from your branch to master -- Once the merge has been accepted, do not continuing working in that branch. make a new branch starting at step 1 - +Quest was designed to be extensible and has three types of plugins (io, tool, and provider). Provider plugins allow Quest to search for data from remote and local data providers. Tool plugins alloww Quest to perform different data manipulations. I/O plugins allows Quest to read and write different file formats. +- Here is a link to an example Quest Plugin: https://github.com/quest-dev/quest_ncep_provider_plugin \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml index a5a5cf18..f4381616 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -35,4 +35,4 @@ install: - python -c "import quest; quest.api.update_settings(dict(CACHE_DIR='%QUEST_CACHE_DIR%')); quest.api.save_settings()" test_script: - - python -m pytest -vv + - python -m pytest -vv \ No newline at end of file diff --git a/conda_environment.yml b/conda_environment.yml index f27455a3..66cbb8c6 100644 --- a/conda_environment.yml +++ b/conda_environment.yml @@ -41,6 +41,7 @@ dependencies: - werkzeug # test dependencies + - pytest=3.7.4 - pytest-runner - pytest-cov - coveralls diff --git a/examples/workflow_gssha.py b/examples/broken/workflow_gssha.py similarity index 100% rename from examples/workflow_gssha.py rename to examples/broken/workflow_gssha.py diff --git a/examples/notebooks/Getting_Started.ipynb b/examples/notebooks/Getting_Started.ipynb new file mode 100644 index 00000000..475b8f72 --- /dev/null +++ b/examples/notebooks/Getting_Started.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Started with using Quest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "import itertools\n", + "from IPython.display import display, Markdown\n", + "\n", + "import param\n", + "import ipywidgets as widgets\n", + "from paramnb import Widgets \n", + "\n", + "import quest\n", + "print('\\nQUEST version %s' % quest.api.get_quest_version())\n", + "print('\\nQUEST API version %s' % quest.api.get_api_version())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Provider Plugins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "providers = quest.api.get_providers()\n", + "print(\"Providers: \")\n", + "pprint(providers)\n", + "\n", + "services = quest.api.get_services()\n", + "print(\"\\nServices: \")\n", + "pprint(services)\n", + "\n", + "publishers = quest.api.get_publishers()\n", + "print(\"\\nPublishers: \")\n", + "pprint(publishers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Tool Plugins:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tools = quest.api.get_tools()\n", + "pprint(tools)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('\\033[1m' + 'Showing existing projects.' + '\\033[0m')\n", + "projects = quest.api.get_projects()\n", + "pprint(projects)\n", + "\n", + "print('\\n---------------------------\\n')\n", + "print('\\033[1m' + 'Creating a new project.' + '\\033[0m')\n", + "\n", + "if 'quest-demo' in projects:\n", + " quest.api.delete_project('quest-demo')\n", + "\n", + "quest.api.new_project('Quest-Demo', 'Quest-Demo', 'For demostrating how to use Quest.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Quest Collections" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('\\033[1m' + 'Showing existing collections.' + '\\033[0m')\n", + "collections = quest.api.get_collections()\n", + "pprint(collections)\n", + "\n", + "print('\\n---------------------------\\n')\n", + "print('\\033[1m' + 'Creating a new project.' + '\\033[0m')\n", + "\n", + "collection_name = 'col-test'\n", + "if collection_name in collections:\n", + " quest.api.delete(collection_name)\n", + "\n", + "quest.api.new_collection(collection_name, collection_name, 'For demonstrating how to add collections to Quest.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting Catalog Entries:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filters = {}\n", + "get_parameter = widgets.Dropdown(options=quest.api.get_mapped_parameters(), description='Parameters:')\n", + "display(get_parameter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if get_parameter.value != '':\n", + " filters['parameter'] = get_parameter.value\n", + " get_service = widgets.Dropdown(options=quest.api.get_services(parameter=get_parameter.value), description='Services:')\n", + "else:\n", + " get_service = widgets.Dropdown(options=quest.api.get_services(), description='Services:')\n", + "display(get_service)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_geom = widgets.Dropdown(options=['', 'Point', 'Polygon', 'Line'], description='Gemoetry:')\n", + "display(get_geom)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if get_geom.value != '':\n", + " filters['geom_type'] = get_geom.value\n", + " \n", + "long_min = widgets.Text(description='long min:')\n", + "lat_min = widgets.Text(description='lat min:')\n", + "long_max = widgets.Text(description='long max:')\n", + "lat_max = widgets.Text(description='lat max:')\n", + "display(long_min, lat_min, long_max, lat_max)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "value = len(long_min.value) * len(lat_min.value) * len(long_max.value) * len(lat_max.value)\n", + "if value != 0:\n", + " bbox = [long_min.value, lat_min.value, long_max.value, lat_max.value]\n", + " filters['bbox'] = bbox" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cache = widgets.Checkbox(value=False, description='Update Cache')\n", + "display(cache)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filters = {'parameter': get_parameter.value}\n", + "datasets = quest.api.search_catalog(uris=get_service.value, filters=filters, update_cache=cache.value, as_dataframe=True)\n", + "datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_datasets = widgets.Dropdown(options=datasets.index.tolist()[:50], description='Datasets:')\n", + "display(get_datasets)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add the dataset to a collection:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "catalog_entry = quest.api.add_datasets(collection_name, get_datasets.value)\n", + "catalog_entry" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the download options:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "download_options = quest.api.get_download_options(catalog_entry, fmt='param')[catalog_entry[0]]\n", + "Widgets(download_options)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the selected data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "staged_id = quest.api.stage_for_download(catalog_entry, options=download_options)\n", + "print('\\033[1m' + \"Staged ID: \" + '\\033[0m' + staged_id[0])\n", + "\n", + "quest.api.download_datasets(staged_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/notebooks/WMTS_Example.ipynb b/examples/notebooks/WMTS_Example.ipynb index d08373bb..fcdd982a 100644 --- a/examples/notebooks/WMTS_Example.ipynb +++ b/examples/notebooks/WMTS_Example.ipynb @@ -82,9 +82,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "dataset = quest.api.get_data(\n", diff --git a/examples/notebooks/Watershed Delineation.ipynb b/examples/notebooks/Watershed Delineation.ipynb index 56fa18fe..381c4d96 100644 --- a/examples/notebooks/Watershed Delineation.ipynb +++ b/examples/notebooks/Watershed Delineation.ipynb @@ -109,9 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "fill_dataset = quest.tools.wbt_fill_depressions(\n", - " dataset=elevation_raster,\n", - ")['datasets'][0]\n", + "fill_dataset = quest.tools.wbt_fill_depressions(dataset=elevation_raster)['datasets'][0]\n", "fill = quest.api.open_dataset(fill_dataset, with_nodata=True, isel_band=0)" ] }, diff --git a/quest/api/__init__.py b/quest/api/__init__.py index 40466d68..d0f3f83d 100644 --- a/quest/api/__init__.py +++ b/quest/api/__init__.py @@ -4,7 +4,7 @@ Services Library. """ -_version__ = '3.0' +__version__ = '3.0.a1' # __all__ needed for autodoc to work diff --git a/quest/api/catalog.py b/quest/api/catalog.py index 03baa4a8..7b67f41b 100644 --- a/quest/api/catalog.py +++ b/quest/api/catalog.py @@ -1,18 +1,18 @@ import json import itertools + import pandas as pd import numpy as np import geojson from shapely.geometry import shape -from .. import util -from .. import plugins -from quest.database.database import get_db, db_session -from .datasets import new_dataset -from .metadata import get_metadata -from ..util import construct_service_uri from .tasks import add_async +from .metadata import get_metadata +from .datasets import new_dataset +from .. import util +from ..plugins import load_providers from ..static import DatasetSource, UriType +from ..database.database import get_db, db_session @add_async @@ -83,14 +83,14 @@ def search_catalog(uris=None, expand=False, as_dataframe=False, as_geojson=False grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=[UriType.DATASET, UriType.COLLECTION]) - services = grouped_uris.get('services') or [] + services = grouped_uris.get(UriType.SERVICE) or [] all_datasets = [] filters = filters or dict() for name in services: provider, service, _ = util.parse_service_uri(name) - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] tmp_datasets = provider_plugin.search_catalog(service, update_cache=update_cache, **filters) all_datasets.append(tmp_datasets) @@ -202,7 +202,7 @@ def get_tags(service_uris, update_cache=False, filter=None, as_count=False): for service in services: provider, service, _ = util.parse_service_uri(service) - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] service_tags = provider_plugin.get_tags(service, update_cache=update_cache) tags.update(service_tags) @@ -261,4 +261,4 @@ def new_catalog_entry(geometry=None, geom_type=None, geom_coords=None, metadata= with db_session: db.QuestCatalog(**data) - return construct_service_uri('quest', 'quest', catalog_id) + return util.construct_service_uri('quest', 'quest', catalog_id) diff --git a/quest/api/collections.py b/quest/api/collections.py index c87bc223..71d9d08e 100644 --- a/quest/api/collections.py +++ b/quest/api/collections.py @@ -1,9 +1,10 @@ -"""API functions related to Collections.""" -from quest.database.database import get_db, db_session -from .projects import _get_project_dir -import pandas as pd import os +import pandas as pd + +from .projects import _get_project_dir +from ..database.database import get_db, db_session + def get_collections(expand=False, as_dataframe=False): """Get available collections. diff --git a/quest/api/datasets.py b/quest/api/datasets.py index d29e9681..074380ea 100644 --- a/quest/api/datasets.py +++ b/quest/api/datasets.py @@ -1,14 +1,16 @@ -from quest.database.database import get_db, db_session, select_datasets -from ..plugins import load_providers, load_plugins, list_plugins -from ..util import logger, parse_service_uri, listify, uuid, is_uuid, classify_uris +import os + +import param +import pandas as pd + +from .tasks import add_async +from .projects import _get_project_dir from .collections import get_collections from .metadata import get_metadata, update_metadata -from .projects import _get_project_dir -from quest.static import DatasetStatus, DatasetSource -from .tasks import add_async -import pandas as pd -import param -import os +from .. import util +from .. import static +from ..plugins import load_providers, load_plugins, list_plugins +from ..database.database import get_db, db_session, select_datasets @add_async @@ -37,7 +39,7 @@ def download(catalog_entry, file_path, dataset=None, **kwargs): if file_path is None: pass - provider, service, catalog_id = parse_service_uri(service_uri) + provider, service, catalog_id = util.parse_service_uri(service_uri) provider_plugin = load_providers()[provider] data = provider_plugin.download(service=service, catalog_id=catalog_id, file_path=file_path, dataset=dataset, **kwargs) @@ -50,7 +52,7 @@ def publish(publisher_uri, options=None, **kwargs): options = dict(options.get_param_values()) options = options or dict() options.update(kwargs) - provider, publisher, _ = parse_service_uri(publisher_uri) + provider, publisher, _ = util.parse_service_uri(publisher_uri) provider_plugin = load_providers()[provider] data = provider_plugin.publish(publisher=publisher, **options) return data @@ -77,7 +79,7 @@ def download_datasets(datasets, raise_on_error=False): return # filter out non download datasets - datasets = datasets[datasets['source'] == DatasetSource.WEB_SERVICE] + datasets = datasets[datasets['source'] == static.DatasetSource.WEB_SERVICE] db = get_db() project_path = _get_project_dir() @@ -86,7 +88,7 @@ def download_datasets(datasets, raise_on_error=False): collection_path = os.path.join(project_path, dataset['collection']) catalog_entry = dataset["catalog_entry"] try: - update_metadata(idx, quest_metadata={'status': DatasetStatus.PENDING}) + update_metadata(idx, quest_metadata={'status': static.DatasetStatus.PENDING}) kwargs = dataset['options'] or dict() all_metadata = download(catalog_entry, file_path=collection_path, @@ -95,7 +97,7 @@ def download_datasets(datasets, raise_on_error=False): metadata = all_metadata.pop('metadata', None) quest_metadata = all_metadata quest_metadata.update({ - 'status': DatasetStatus.DOWNLOADED, + 'status': static.DatasetStatus.DOWNLOADED, 'message': 'success', }) except Exception as e: @@ -103,7 +105,7 @@ def download_datasets(datasets, raise_on_error=False): raise quest_metadata = { - 'status': DatasetStatus.FAILED_DOWNLOAD, + 'status': static.DatasetStatus.FAILED_DOWNLOAD, 'message': str(e), } @@ -135,18 +137,18 @@ def get_download_options(uris, fmt='json'): download options that can be specified when calling quest.api.stage_for_download or quest.api.download """ - uris = listify(uris) - grouped_uris = classify_uris(uris, as_dataframe=False, exclude=['collections']) + uris = util.listify(uris) + grouped_uris = util.classify_uris(uris, as_dataframe=False, exclude=['collections']) - services = grouped_uris.get('services') or [] - datasets = grouped_uris.get('datasets') or [] + services = grouped_uris.get(static.UriType.SERVICE) or [] + datasets = grouped_uris.get(static.UriType.DATASET) or [] service_uris = {s: s for s in services} service_uris.update({dataset: get_metadata(dataset)[dataset]['catalog_entry'] for dataset in datasets}) options = {} for uri, service_uri in service_uris.items(): - provider, service, _ = parse_service_uri(service_uri) + provider, service, _ = util.parse_service_uri(service_uri) provider_plugin = load_providers()[provider] options[uri] = provider_plugin.get_download_options(service, fmt) @@ -154,11 +156,11 @@ def get_download_options(uris, fmt='json'): def get_publish_options(publish_uri, fmt='json'): - uris = listify(publish_uri) + uris = util.listify(publish_uri) options = {} for uri in uris: publish_uri = uri - provider, publisher, _ = parse_service_uri(publish_uri) + provider, publisher, _ = util.parse_service_uri(publish_uri) provider_plugin = load_providers()[provider] options[uri] = provider_plugin.publish_options(publisher, fmt) @@ -198,7 +200,7 @@ def get_datasets(expand=None, filters=None, queries=None, as_dataframe=None): if filters is not None: for k, v in filters.items(): if k not in datasets.keys(): - logger.warning('filter field {} not found, continuing'.format(k)) + util.logger.warning('filter field {} not found, continuing'.format(k)) continue datasets = datasets.loc[datasets[k] == v] @@ -253,11 +255,11 @@ def new_dataset(catalog_entry, collection, source=None, display_name=None, except IndexError: raise ValueError('Entry {} dose not exist'.format(catalog_entry)) - name = name or uuid('dataset') - assert name.startswith('d') and is_uuid(name) + name = name or util.uuid('dataset') + assert name.startswith('d') and util.is_uuid(name) if source is None: - source = DatasetSource.USER + source = static.DatasetSource.USER if display_name is None: display_name = name @@ -275,8 +277,8 @@ def new_dataset(catalog_entry, collection, source=None, display_name=None, 'file_path': file_path, 'metadata': metadata, } - if source == DatasetSource.WEB_SERVICE: - quest_metadata.update({'status': DatasetStatus.NOT_STAGED}) + if source == static.DatasetSource.WEB_SERVICE: + quest_metadata.update({'status': static.DatasetStatus.NOT_STAGED}) db = get_db() with db_session: @@ -301,7 +303,7 @@ def stage_for_download(uris, options=None): uris (list): staged dataset uids """ - uris = listify(uris) + uris = util.listify(uris) display_name = None datasets = [] @@ -323,13 +325,13 @@ def stage_for_download(uris, options=None): if dataset_metadata['display_name'] == dataset_uri: catalog_entry = dataset_metadata['catalog_entry'] - provider, service, _ = parse_service_uri(catalog_entry) + provider, service, _ = util.parse_service_uri(catalog_entry) display_name = '{0}-{1}-{2}'.format(provider, parameter_name, dataset_uri[:7]) quest_metadata = { 'display_name': display_name or dataset_metadata['display_name'], 'options': kwargs, - 'status': DatasetStatus.STAGED, + 'status': static.DatasetStatus.STAGED, 'parameter': parameter } @@ -376,10 +378,10 @@ def open_dataset(dataset, fmt=None, **kwargs): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] + io = load_plugins(static.PluginType.IO, file_format)[file_format] return io.open(path, fmt=fmt, **kwargs) @@ -415,10 +417,10 @@ def visualize_dataset(dataset, update_cache=False, **kwargs): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] + io = load_plugins(static.PluginType.IO, file_format)[file_format] title = m.get('display_name') if title is None: @@ -455,9 +457,9 @@ def get_visualization_options(dataset, fmt='json'): if path is None: raise ValueError('No dataset file found') - if file_format not in list_plugins('io'): + if file_format not in list_plugins(static.PluginType.IO): raise ValueError('No reader available for: %s' % file_format) - io = load_plugins('io', file_format)[file_format] + io = load_plugins(static.PluginType.IO, file_format)[file_format] return io.visualize_options(path, fmt) diff --git a/quest/api/manage.py b/quest/api/manage.py index b0689e4c..b8554ede 100644 --- a/quest/api/manage.py +++ b/quest/api/manage.py @@ -1,12 +1,13 @@ import os import shutil -from ..util.log import logger -from quest.database.database import get_db, db_session, select_datasets + +from .tasks import add_async from .projects import _get_project_dir from .collections import get_collections from .metadata import get_metadata, update_metadata -from .tasks import add_async -from .. import util +from ..static import UriType, DatasetSource +from ..util import logger, classify_uris, uuid, parse_service_uri +from ..database.database import get_db, db_session, select_datasets @add_async @@ -30,16 +31,16 @@ def delete(uris): return True # group uris by type - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] db = get_db() for uri in uris: - if resource == 'collections': + if resource == UriType.COLLECTION: if uri not in get_collections(): logger.error('Collection does not exist: %s', uri) raise ValueError('Collection does not exists') @@ -56,15 +57,15 @@ def delete(uris): logger.info('deleting all data under path: %s' % path) shutil.rmtree(path) - if resource == 'datasets': + if resource == UriType.DATASET: with db_session: dataset = db.Dataset[uri] - if dataset.source == 'derived': + if dataset.source == DatasetSource.DERIVED: catalog_entry_datasets = select_datasets(lambda d: d.catalog_entry == dataset.catalog_entry) if len(catalog_entry_datasets) == 1: - _, _, catalog_id = util.parse_service_uri(dataset.catalog_entry) + _, _, catalog_id = parse_service_uri(dataset.catalog_entry) db.QuestCatalog[catalog_id].delete() try: @@ -83,10 +84,10 @@ def move(uris, destination_collection, as_dataframe=None, expand=None): if not uris: return {} - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers', 'collections'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER, UriType.COLLECTION], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] project_path = _get_project_dir() @@ -95,7 +96,7 @@ def move(uris, destination_collection, as_dataframe=None, expand=None): new_datasets = [] for uri in uris: - if resource == 'datasets': + if resource == UriType.DATASET: dataset_metadata = get_metadata(uri)[uri] collection_path = os.path.join(project_path, dataset_metadata['collection']) _move_dataset(dataset_metadata, collection_path, destination_collection_path) @@ -117,10 +118,10 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): if not uris: return {} - grouped_uris = util.classify_uris(uris, - as_dataframe=False, - exclude=['services', 'publishers', 'collections'], - require_same_type=True) + grouped_uris = classify_uris(uris, + as_dataframe=False, + exclude=[UriType.SERVICE, UriType.PUBLISHER, UriType.COLLECTION], + require_same_type=True) resource = list(grouped_uris)[0] uris = grouped_uris[resource] project_path = _get_project_dir() @@ -129,7 +130,7 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): new_datasets = [] for uri in uris: - if resource == 'datasets': + if resource == UriType.DATASET: dataset_metadata = get_metadata(uri)[uri] collection_path = os.path.join(project_path, dataset_metadata['collection']) @@ -146,7 +147,7 @@ def copy(uris, destination_collection, as_dataframe=None, expand=None): def _copy_dataset(dataset_metadata, collection_path, destination_collection_path): - new_name = util.uuid('dataset') + new_name = uuid('dataset') db = get_db() with db_session: db_metadata = db.Dataset[dataset_metadata['name']].to_dict() diff --git a/quest/api/metadata.py b/quest/api/metadata.py index be9aa70f..f1b165a6 100644 --- a/quest/api/metadata.py +++ b/quest/api/metadata.py @@ -1,13 +1,8 @@ -"""API functions related to metadata. - -get/update metadata for projects/collections/datasets. -""" - import pandas as pd -from .. import util -from .. import plugins from ..static import UriType +from ..plugins import load_providers +from ..util import classify_uris, construct_service_uri, parse_service_uri from ..database import get_db, db_session, select_collections, select_datasets @@ -25,7 +20,7 @@ def get_metadata(uris, as_dataframe=False): metadata at each uri keyed on uris """ # group uris by type - grouped_uris = util.classify_uris(uris) + grouped_uris = classify_uris(uris) # handle case when no uris are passed in if not any(grouped_uris): metadata = pd.DataFrame() @@ -36,17 +31,17 @@ def get_metadata(uris, as_dataframe=False): metadata = [] # get metadata for service type uris - if 'services' in grouped_uris.groups.keys(): - svc_df = grouped_uris.get_group('services') - svc_df[['provider', 'service', 'catalog_id']] = svc_df['uri'].apply(util.parse_service_uri).apply(pd.Series) + if UriType.SERVICE in grouped_uris.groups.keys(): + svc_df = grouped_uris.get_group(UriType.SERVICE) + svc_df[['provider', 'service', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series) for (provider, service), grp in svc_df.groupby(['provider', 'service']): - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] if not grp.query('catalog_id != catalog_id').empty: service_metadata = provider_plugin.get_services()[service] - index = util.construct_service_uri(provider, service) + index = construct_service_uri(provider, service) metadata.append(pd.DataFrame(service_metadata, index=[index])) selected_catalog_entries = grp.query('catalog_id == catalog_id').uri.tolist() @@ -55,27 +50,27 @@ def get_metadata(uris, as_dataframe=False): catalog_entries = catalog_entries.loc[selected_catalog_entries] metadata.append(catalog_entries) - if 'publishers' in grouped_uris.groups.keys(): - svc_df = grouped_uris.get_group('publishers') - svc_df[['provider', 'publish', 'catalog_id']] = svc_df['uri'].apply(util.parse_service_uri).apply(pd.Series) + if UriType.PUBLISHER in grouped_uris.groups.keys(): + svc_df = grouped_uris.get_group(UriType.PUBLISHER) + svc_df[['provider', 'publish', 'catalog_id']] = svc_df['uri'].apply(parse_service_uri).apply(pd.Series) for (provider, publisher), grp in svc_df.groupby(['provider', 'publish']): - provider_plugin = plugins.load_providers()[provider] + provider_plugin = load_providers()[provider] publisher_metadata = provider_plugin.get_publishers()[publisher] - index = util.construct_service_uri(provider, publisher) + index = construct_service_uri(provider, publisher) metadata.append(pd.DataFrame(publisher_metadata, index=[index])) - if 'collections' in grouped_uris.groups.keys(): + if UriType.COLLECTION in grouped_uris.groups.keys(): # get metadata for collections - tmp_df = grouped_uris.get_group('collections') + tmp_df = grouped_uris.get_group(UriType.COLLECTION) collections = select_collections(lambda c: c.name in tmp_df['uri'].tolist()) collections = pd.DataFrame(collections) collections.set_index('name', inplace=True, drop=False) metadata.append(collections) - if 'datasets' in grouped_uris.groups.keys(): - tmp_df = grouped_uris.get_group('datasets') + if UriType.DATASET in grouped_uris.groups.keys(): + tmp_df = grouped_uris.get_group(UriType.DATASET) datasets = select_datasets(lambda c: c.name in tmp_df['uri'].tolist()) datasets = pd.DataFrame(datasets) datasets.set_index('name', inplace=True, drop=False) @@ -116,10 +111,7 @@ def update_metadata(uris, display_name=None, description=None, } # group uris by type - grouped_uris = util.classify_uris(uris, - as_dataframe=True, - exclude=[UriType.PUBLISHER], - require_same_type=True) + grouped_uris = classify_uris(uris, as_dataframe=True, exclude=[UriType.PUBLISHER], require_same_type=True) resource = list(grouped_uris.groups.keys())[0] uris = grouped_uris.get_group(resource) get_db_entity = get_db_entity_funcs[resource] @@ -153,9 +145,7 @@ def update_metadata(uris, display_name=None, description=None, metadata = [metadata] quest_metadata = [quest_metadata] - for uri, name, desc, meta, quest_meta in zip(uris, display_name, - description, metadata, - quest_metadata): + for uri, name, desc, meta, quest_meta in zip(uris, display_name, description, metadata, quest_metadata): if quest_meta is None: quest_meta = {} diff --git a/quest/api/parameters.py b/quest/api/parameters.py index d60aaf91..716ae793 100644 --- a/quest/api/parameters.py +++ b/quest/api/parameters.py @@ -1,8 +1,10 @@ -"""API functions related to Parameters.""" +import os + import pandas as pd + from .providers import get_services -import os -from .. import util +from ..util import parse_service_uri, get_cache_dir +from ..plugins import load_providers def get_mapped_parameters(): @@ -35,7 +37,7 @@ def get_parameters(service_uri, update_cache=False): """ - provider, service, catalog_id = util.parse_service_uri(service_uri) + provider, service, catalog_id = parse_service_uri(service_uri) parameters = _read_cached_parameters(provider, service, update_cache=update_cache) if isinstance(parameters, pd.DataFrame) and catalog_id: @@ -62,7 +64,7 @@ def delete_parameter(): def _read_cached_parameters(provider, service, update_cache=False): """read cached parameters.""" - cache_file = os.path.join(util.get_cache_dir(), provider, service + '_parameters.h5') + cache_file = os.path.join(get_cache_dir(), provider, service + '_parameters.h5') if update_cache: return _get_parameters(provider, service, cache_file) @@ -75,7 +77,7 @@ def _read_cached_parameters(provider, service, update_cache=False): def _get_parameters(provider, service, cache_file): - driver = util.load_providers()[provider] + driver = load_providers()[provider] parameters = driver.get_parameters(service) os.makedirs(os.path.split(cache_file)[0], exist_ok=True) if isinstance(parameters, pd.DataFrame): diff --git a/quest/api/projects.py b/quest/api/projects.py index e3ef82d1..5f2493cd 100644 --- a/quest/api/projects.py +++ b/quest/api/projects.py @@ -1,10 +1,10 @@ -"""API functions related to Projects.""" import os -import pandas as pd import shutil -from ..util.log import logger -from .. import util -from quest.database.database import db_session, get_db, init_db + +import pandas as pd + +from ..util import logger, get_projects_dir, read_yaml, write_yaml +from ..database.database import db_session, get_db, init_db PROJECT_DB_FILE = 'metadata.db' @@ -115,7 +115,7 @@ def delete_project(name): folder = projects[name]['folder'] if not os.path.isabs(folder): - path = os.path.join(util.get_projects_dir(), folder) + path = os.path.join(get_projects_dir(), folder) else: path = folder if os.path.exists(path): @@ -134,7 +134,7 @@ def get_active_project(): """ path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) default_project = contents.get('active_project') if default_project is None: projects = contents.get('projects') or _create_default_project() @@ -143,7 +143,7 @@ def get_active_project(): 'active_project': default_project, 'projects': projects }) - util.write_yaml(path, contents) + write_yaml(path, contents) return default_project @@ -169,7 +169,7 @@ def get_projects(expand=False, as_dataframe=False): for name, project in _load_projects().items(): path = project['folder'] if not os.path.isabs(path): - path = os.path.join(util.get_projects_dir(), path) + path = os.path.join(get_projects_dir(), path) data = _load_project(name) data.update({ @@ -242,11 +242,11 @@ def set_active_project(name): """ path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) if name not in contents['projects'].keys(): raise ValueError('Project %s does not exist' % name) contents.update({'active_project': name}) - util.write_yaml(path, contents) + write_yaml(path, contents) get_db(active_db(), reconnect=True) # change active database return name @@ -268,7 +268,7 @@ def _new_project(name, display_name=None, description=None, metadata=None, folde metadata = {} if not os.path.isabs(folder): - path = os.path.join(util.get_projects_dir(), folder) + path = os.path.join(get_projects_dir(), folder) else: path = folder @@ -307,7 +307,7 @@ def _load_project(name): def _load_projects(): """load list of collections.""" path = _get_projects_index_file() - projects = util.read_yaml(path).get('projects') + projects = read_yaml(path).get('projects') if not projects: projects = _create_default_project() @@ -317,9 +317,9 @@ def _load_projects(): def _write_projects(projects): """write list of collections to file.""" path = _get_projects_index_file() - contents = util.read_yaml(path) + contents = read_yaml(path) contents.update({'projects': projects}) - util.write_yaml(path, contents) + write_yaml(path, contents) def _get_project_dir(): @@ -333,10 +333,10 @@ def _get_project_db(name): path = projects[name]['folder'] if not os.path.isabs(path): - path = os.path.join(util.get_projects_dir(), path) + path = os.path.join(get_projects_dir(), path) return os.path.join(path, PROJECT_DB_FILE) def _get_projects_index_file(): - return os.path.join(util.get_projects_dir(), PROJECT_INDEX_FILE) + return os.path.join(get_projects_dir(), PROJECT_INDEX_FILE) diff --git a/quest/api/providers.py b/quest/api/providers.py index 08aac48e..4c8a8a2c 100644 --- a/quest/api/providers.py +++ b/quest/api/providers.py @@ -1,12 +1,9 @@ -"""API functions related to Services. +import os +import requests -Providers are inferred by aggregating information from service plugins. -""" -from ..util import save_settings, get_settings, update_settings, parse_service_uri -from quest.database.database import get_db, db_session from ..plugins import load_providers -import requests -import os +from ..database.database import get_db, db_session +from ..util import save_settings, get_settings, update_settings, parse_service_uri def get_providers(expand=None, update_cache=False): diff --git a/quest/api/tasks.py b/quest/api/tasks.py index 9bcbeccc..70215c04 100644 --- a/quest/api/tasks.py +++ b/quest/api/tasks.py @@ -1,12 +1,14 @@ -from concurrent.futures import CancelledError -from functools import wraps -from distributed import Client, LocalCluster +import sys import psutil + import pandas as pd from tornado import gen -import sys -from ..util import listify -from ..util.log import logger +from functools import wraps +from distributed import Client, LocalCluster +from concurrent.futures import CancelledError + +from ..static import DatasetStatus +from ..util import listify, logger _cluster = None tasks = {} @@ -44,7 +46,7 @@ def wrapper(*args, **kwargs): 'fn': f.__name__, 'args': args, 'kwargs': kwargs, - 'status': 'pending', + 'status': DatasetStatus.PENDING, 'result': None, } return future.key @@ -60,7 +62,7 @@ def get_pending_tasks(**kwargs): (filters={}, expand=None, as_dataframe=None, with_future=None) """ - filters = {'status': 'pending'} + filters = {'status': DatasetStatus.PENDING} if 'filters' in kwargs: kwargs['filters'].update(filters) else: @@ -169,9 +171,9 @@ def remove_tasks(task_ids=None, status=None): else: status = ['cancelled', 'finished', 'lost', 'error'] - if 'pending' in status: + if DatasetStatus.PENDING in status: logger.error('cannot remove pending tasks, please cancel them first') - status.remove('pending') + status.remove(DatasetStatus.PENDING) task_list = get_tasks(filters={'status': status, 'task_ids': task_ids}) diff --git a/quest/api/tools.py b/quest/api/tools.py index ebae2bbe..55d0d26d 100644 --- a/quest/api/tools.py +++ b/quest/api/tools.py @@ -1,14 +1,11 @@ -"""API functions related to data Tools. - -This will eventually hold filter related functionality -""" import param -from .. import util -from ..plugins.plugins import load_plugins from .datasets import open_dataset from .metadata import get_metadata from .tasks import add_async +from ..util import to_geojson +from ..static import UriType, PluginType +from ..plugins.plugins import load_plugins def get_tools(filters=None, expand=False, **kwargs): @@ -36,7 +33,7 @@ def get_tools(filters=None, expand=False, **kwargs): all available tools """ - avail = [dict(name=k, **v.metadata) for k, v in load_plugins('tool').items()] + avail = [dict(name=k, **v.metadata) for k, v in load_plugins(PluginType.TOOL).items()] if filters is not None: for k, v in filters.items(): @@ -94,7 +91,7 @@ def run_tool(name, options=None, as_dataframe=None, expand=None, as_open_dataset options = options or dict() options.update(kwargs) - plugin = load_plugins('tool', name)[name] + plugin = load_plugins(PluginType.TOOL, name)[name] result = plugin.run_tool(**options) new_datasets = result.get('datasets', []) @@ -106,7 +103,7 @@ def run_tool(name, options=None, as_dataframe=None, expand=None, as_open_dataset if expand: new_datasets = list(new_datasets.to_dict(orient='index').values()) - new_catalog_entries = util.to_geojson(new_catalog_entries)['catalog_entries'] + new_catalog_entries = to_geojson(new_catalog_entries)['catalog_entries'] result.update({'datasets': new_datasets, 'catalog_entries': new_catalog_entries}) @@ -130,5 +127,5 @@ def get_tool_options(name, fmt='json', **kwargs): tool options (json scheme): tool options that can be applied when calling quest.api.run_filter """ - plugin = load_plugins('tool', name)[name] + plugin = load_plugins(PluginType.TOOL, name)[name] return plugin.get_tool_options(fmt, **kwargs) diff --git a/quest/api/version.py b/quest/api/version.py index 1db5cbc0..5c7ef057 100644 --- a/quest/api/version.py +++ b/quest/api/version.py @@ -1,4 +1,3 @@ -"""API functions related to versions.""" import quest diff --git a/quest/api/workflows.py b/quest/api/workflows.py index 1800bc76..d1daea86 100644 --- a/quest/api/workflows.py +++ b/quest/api/workflows.py @@ -5,8 +5,8 @@ from .catalog import search_catalog, add_datasets from .tools import run_tool from ..database import get_db, db_session -from ..static import DatasetStatus from ..util import logger as log +from ..static import DatasetStatus, UriType def get_data( diff --git a/quest/database/database.py b/quest/database/database.py index f5a8df4b..c5065aeb 100644 --- a/quest/database/database.py +++ b/quest/database/database.py @@ -1,4 +1,5 @@ from datetime import datetime + from pony import orm from pony.orm import db_session import shapely.wkt diff --git a/quest/plugins/base/publish_base.py b/quest/plugins/base/publish_base.py index a19ca1fb..d3e5b935 100644 --- a/quest/plugins/base/publish_base.py +++ b/quest/plugins/base/publish_base.py @@ -1,6 +1,7 @@ -from quest.util.param_util import format_json_options import param +from ...util.param_util import format_json_options + class PublishBase(param.Parameterized): publisher_name = None diff --git a/quest/plugins/base/service_base.py b/quest/plugins/base/service_base.py index b8bfaf3d..287afe1b 100644 --- a/quest/plugins/base/service_base.py +++ b/quest/plugins/base/service_base.py @@ -2,13 +2,13 @@ import os import pickle -import geopandas as gpd -import pandas as pd -import param import ulmo +import param +import pandas as pd +import geopandas as gpd from shapely.geometry import box, Point -from ... import util +from quest import util reserved_catalog_entry_fields = [ diff --git a/quest/plugins/base/tool_base.py b/quest/plugins/base/tool_base.py index c0035149..09080418 100644 --- a/quest/plugins/base/tool_base.py +++ b/quest/plugins/base/tool_base.py @@ -1,10 +1,10 @@ -import abc import os +import abc import param -from ...static import DatasetStatus, DatasetSource from ...util import listify, format_json_options, uuid +from ...static import DatasetStatus, DatasetSource, UriType class ToolBase(param.ParameterizedFunction): diff --git a/quest/plugins/plugins.py b/quest/plugins/plugins.py index 87c7d718..04b067b6 100644 --- a/quest/plugins/plugins.py +++ b/quest/plugins/plugins.py @@ -1,33 +1,37 @@ -from .base import ProviderBase, IoBase, ToolBase -from ..util import listify, get_settings import importlib import inspect import pkgutil import logging + +from .base import ProviderBase, IoBase, ToolBase +from ..static import PluginType +from ..util import listify, get_settings + + logger = logging.getLogger('quest') plugin_instances = { - 'provider': None, - 'io': None, - 'tool': None, + PluginType.PROVIDER: None, + PluginType.IO: None, + PluginType.TOOL: None, } plugin_namespaces = { - 'provider': 'quest_provider_plugins', - 'io': 'quest_io_plugins', - 'tool': 'quest_tool_plugins', + PluginType.PROVIDER: 'quest_provider_plugins', + PluginType.IO: 'quest_io_plugins', + PluginType.TOOL: 'quest_tool_plugins', } plugin_base_classes = { - 'provider': ProviderBase, - 'io': IoBase, - 'tool': ToolBase, + PluginType.PROVIDER: ProviderBase, + PluginType.IO: IoBase, + PluginType.TOOL: ToolBase, } plugin_instantiate_funcs = { - 'provider': lambda x: x(), - 'io': lambda x: x(), - 'tool': lambda x: x.instance(), + PluginType.PROVIDER: lambda x: x(), + PluginType.IO: lambda x: x(), + PluginType.TOOL: lambda x: x.instance(), } @@ -109,8 +113,8 @@ def load_providers(update_cache=False): settings = get_settings() - if update_cache or plugin_instances['provider'] is None: - providers = load_plugins('provider', update_cache=True) + if update_cache or plugin_instances[PluginType.PROVIDER] is None: + providers = load_plugins(PluginType.PROVIDER, update_cache=True) if len(settings.get('USER_SERVICES', [])) > 0: from quest.plugins import user_provider for uri in settings.get('USER_SERVICES', []): @@ -122,8 +126,8 @@ def load_providers(update_cache=False): 'due to the following exception: \n\t{} {}.' .format('user', uri, e.__class__.__name__, str(e))) - plugin_instances['provider'] = providers + plugin_instances[PluginType.PROVIDER] = providers else: - providers = plugin_instances['provider'] + providers = plugin_instances[PluginType.PROVIDER] return providers diff --git a/quest/plugins/user_provider.py b/quest/plugins/user_provider.py index 9276faf6..b26a2671 100644 --- a/quest/plugins/user_provider.py +++ b/quest/plugins/user_provider.py @@ -1,15 +1,18 @@ -from geojson import Feature, FeatureCollection, Polygon -from quest.plugins.base import ProviderBase, ServiceBase -from io import StringIO -from quest import util -import pandas as pd -import requests -import warnings -import geojson +import os import shutil -import param + import yaml -import os +import param +import geojson +import requests +import warnings +import pandas as pd +from io import StringIO +from geojson import Feature, FeatureCollection + +from ..static import UriType +from ..plugins.base import ProviderBase, ServiceBase +from ..util import listify, to_geodataframe, bbox2poly, is_remote_uri def get_user_service_base(): @@ -19,7 +22,7 @@ class UserServiceBase(ServiceBase): @classmethod def instance(cls, service_name, service_data, provider, uri, is_remote): - parameters = util.listify(service_data['metadata'].pop('parameters')) + parameters = listify(service_data['metadata'].pop('parameters')) if len(parameters) > 1: cls.params()['parameter'].objects = sorted(parameters) @@ -34,7 +37,7 @@ def instance(cls, service_name, service_data, provider, uri, is_remote): self._parameter_map = {p: p for p in parameters} for k, v in service_data['metadata'].items(): setattr(self, k, v) - self.service_folder = util.listify(service_data['service_folder']) + self.service_folder = listify(service_data['service_folder']) if len(self.service_folder) > 1: raise ValueError() # Now only supporting one service folder else: @@ -52,9 +55,10 @@ def download(self, catalog_id, file_path, dataset, **kwargs): fnames = self.datasets_mapping if isinstance(self.datasets_mapping, dict): fnames = self.dataset_mapping[self.parameter] - fnames = [f.replace('', catalog_id) for f in util.listify(fnames)] + fnames = [f.replace('', catalog_id) for f in listify(fnames)] else: - fnames = self.catalog_entries.loc[catalog_id]['_download_url'] # TODO where does self.catalog_entries get initialized? + fnames = self.catalog_entries.loc[catalog_id]['_download_url'] + # TODO where does self.catalog_entries get initialized? final_path = [] for src, file_name in zip(self._get_paths(fnames), fnames): @@ -105,11 +109,11 @@ def search_catalog(self, **kwargs): all_catalog_entries = [] - for p in util.listify(paths): + for p in listify(paths): with uri_open(p, self.is_remote) as f: if fmt.lower() == 'geojson': catalog_entries = geojson.load(f) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'mbr': # TODO creating FeatureCollection not needed anymore @@ -120,9 +124,12 @@ def search_catalog(self, **kwargs): for line in f: catalog_id, x1, y1, x2, y2 = line.split() properties = {} - polys.append(Feature(geometry=util.bbox2poly(x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) + polys.append(Feature(geometry=bbox2poly(x1, y1, x2, y2, + as_geojson=True), + properties=properties, + id=catalog_id)) catalog_entries = FeatureCollection(polys) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'mbr-csv': # TODO merge this with the above, @@ -133,16 +140,20 @@ def search_catalog(self, **kwargs): catalog_id, y1, x1, y2, x2 = line.split(',') catalog_id = catalog_id.split('.')[0] properties = {} - polys.append(Feature(geometry=util.bbox2poly(x1, y1, x2, y2, as_geojson=True), properties=properties, id=catalog_id)) + polys.append(Feature(geometry=bbox2poly(x1, y1, x2, y2, + as_geojson=True), + properties=properties, + id=catalog_id)) catalog_entries = FeatureCollection(polys) - catalog_entries = util.to_geodataframe(catalog_entries) + catalog_entries = to_geodataframe(catalog_entries) if fmt.lower() == 'isep-json': # uses exported json file from ISEP DataBase # assuming ISEP if a geotypical service for now. catalog_entries = pd.read_json(p) catalog_entries.rename(columns={'_id': 'service_id'}, inplace=True) - catalog_entries['download_url'] = catalog_entries['files'].apply(lambda x: os.path.join(x[0].get('file location'), x[0].get('file name'))) + catalog_entries['download_url'] = catalog_entries['files'].apply( + lambda x: os.path.join(x[0].get('file location'), x[0].get('file name'))) # remove leading slash from file path catalog_entries['download_url'] = catalog_entries['download_url'].str.lstrip('/') catalog_entries['parameters'] = 'met' @@ -160,7 +171,7 @@ def search_catalog(self, **kwargs): def _get_paths(self, filenames): folder = self.service_folder paths = list() - for filename in util.listify(filenames): + for filename in listify(filenames): if self.uri.startswith('http'): paths.append(self.uri.rstrip('/') + '/{}/{}'.format(folder, filename)) else: @@ -180,7 +191,7 @@ class UserProvider(ProviderBase): def __init__(self, uri, name=None, use_cache=True, update_frequency='M'): super(UserProvider, self).__init__(name=name, use_cache=use_cache, update_frequency=update_frequency) self.uri = uri - self.is_remote = util.is_remote_uri(uri) + self.is_remote = is_remote_uri(uri) self._register() @property diff --git a/quest/static/__init__.py b/quest/static/__init__.py index bf3f5517..8220e3ca 100644 --- a/quest/static/__init__.py +++ b/quest/static/__init__.py @@ -42,3 +42,9 @@ class DatasetSource: DERIVED = 'derived' WEB_SERVICE = 'download' USER = 'user-created' + + +class PluginType: + IO = 'io' + TOOL = 'tool' + PROVIDER = 'provider' diff --git a/quest/tools/__init__.py b/quest/tools/__init__.py index ab6bbadd..c2381c76 100644 --- a/quest/tools/__init__.py +++ b/quest/tools/__init__.py @@ -1,9 +1,10 @@ import sys +from ..static import PluginType from ..plugins import load_plugins -tools = load_plugins('tool') +tools = load_plugins(PluginType.TOOL) def codify(name): diff --git a/quest/util/__init__.py b/quest/util/__init__.py index 2ef99c08..ca543a80 100644 --- a/quest/util/__init__.py +++ b/quest/util/__init__.py @@ -1,4 +1,3 @@ -# flake8: noqa from .misc import * from .io import read_yaml, write_yaml from .config import get_settings, save_settings, update_settings, update_settings_from_file diff --git a/quest/util/config.py b/quest/util/config.py index 0fe68560..7614da48 100644 --- a/quest/util/config.py +++ b/quest/util/config.py @@ -1,12 +1,10 @@ -"""Module wide settings. - -""" -import logging -import yaml import os +import yaml +import logging from ..database import get_db + log = logging.getLogger(__name__) settings = {} diff --git a/quest/util/io.py b/quest/util/io.py index 3eb80e92..963e512f 100644 --- a/quest/util/io.py +++ b/quest/util/io.py @@ -1,4 +1,3 @@ -"""io utilities.""" import os import yaml diff --git a/quest/util/misc.py b/quest/util/misc.py index 0a1df3d6..6d657cc1 100644 --- a/quest/util/misc.py +++ b/quest/util/misc.py @@ -26,8 +26,9 @@ def _abs_path(path, mkdir=True): path (string): A string that is a filename. mkdir (bool): A boolean if the user wants to create the directory. Returns: - A string of an absolute path with a file from somwhere with in the Quest directory. + A string of an absolute path with a file from somewhere with in the Quest directory. """ + if not os.path.isabs(path): path = os.path.join(get_quest_dir(), path) @@ -207,6 +208,7 @@ def get_cache_dir(service=None): """ settings = get_settings() path = _abs_path(settings['CACHE_DIR']) + if service is not None: path = os.path.join(path, service) diff --git a/quest_io_plugins/raster_gdal.py b/quest_io_plugins/raster_gdal.py index 73caa8f0..eabb6a15 100644 --- a/quest_io_plugins/raster_gdal.py +++ b/quest_io_plugins/raster_gdal.py @@ -1,13 +1,12 @@ -"""io plugin for timeseries datasets.""" - -from quest.plugins import IoBase -from quest.util import convert_nodata_to_nans import os import subprocess + import rasterio import xarray as xr -import numpy as np +from quest.plugins import IoBase +from quest.static import DataType +from quest.util import convert_nodata_to_nans class RasterGdal(IoBase): diff --git a/quest_io_plugins/timeseries_hdf5.py b/quest_io_plugins/timeseries_hdf5.py index bf3918a8..3e92f0e0 100644 --- a/quest_io_plugins/timeseries_hdf5.py +++ b/quest_io_plugins/timeseries_hdf5.py @@ -1,7 +1,6 @@ -"""io plugin for timeseries datasets.""" - import json +from quest.static import DataType from quest_io_plugins.xyHdf5 import XYHdf5 diff --git a/quest_io_plugins/xyHdf5.py b/quest_io_plugins/xyHdf5.py index b6de770f..6c7a17e1 100644 --- a/quest_io_plugins/xyHdf5.py +++ b/quest_io_plugins/xyHdf5.py @@ -1,12 +1,12 @@ -import json import os +import json import pandas as pd import matplotlib.pyplot as plt from quest.plugins import IoBase -from quest.util import setattr_on_dataframe from quest.util.log import logger +from quest.util import setattr_on_dataframe class XYHdf5(IoBase): diff --git a/quest_provider_plugins/cuahsi_hs.py b/quest_provider_plugins/cuahsi_hs.py index 1251d5e5..da4d911a 100644 --- a/quest_provider_plugins/cuahsi_hs.py +++ b/quest_provider_plugins/cuahsi_hs.py @@ -1,15 +1,16 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase, PublishBase +import os +import param + +import pandas as pd +from getpass import getpass +from shapely.geometry import Point, box from hs_restclient import HydroShare, HydroShareAuthBasic -from quest.database.database import get_db, db_session +from quest.plugins import ProviderBase, SingleFileServiceBase, PublishBase + from quest.api.metadata import get_metadata from quest.util import param_util, listify, log -from quest.static import DatasetStatus -from shapely.geometry import Point, box -from quest.static import ServiceType -from getpass import getpass -import pandas as pd -import param -import os +from quest.database.database import get_db, db_session +from quest.static import DatasetStatus, ServiceType, GeomType class HSServiceBase(SingleFileServiceBase): @@ -28,7 +29,7 @@ class HSGeoService(HSServiceBase): description = 'HydroShare geo-discrete resources.' service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' + geom_type = GeomType.POINT datatype = 'zip' geographical_areas = ['Worldwide'] bounding_boxes = [ diff --git a/quest_provider_plugins/kitware_girder.py b/quest_provider_plugins/kitware_girder.py index d1df7bed..a1ad1efc 100644 --- a/quest_provider_plugins/kitware_girder.py +++ b/quest_provider_plugins/kitware_girder.py @@ -1,9 +1,10 @@ +import param +import girder_client + +from quest.util import param_util, log +from quest.api.metadata import get_metadata from quest.plugins import ProviderBase, PublishBase from quest.database.database import get_db, db_session -from quest.api.metadata import get_metadata -from quest.util import param_util, log -import girder_client -import param # There is no service base fore the Live Girder Server due to the general layout of how # the folders and files are layed out. It would be super difficult to look through diff --git a/quest_provider_plugins/nasa.py b/quest_provider_plugins/nasa.py index 87685594..1439b333 100644 --- a/quest_provider_plugins/nasa.py +++ b/quest_provider_plugins/nasa.py @@ -1,8 +1,10 @@ -from quest.database.database import get_db, db_session -from quest.util import log -from getpass import getpass -import pandas as pd import requests +import pandas as pd +from getpass import getpass + +from quest.util import log +from quest.database.database import get_db, db_session +from quest.static import ServiceType, GeomType, DataType collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json?short_name=%s' @@ -12,10 +14,10 @@ class NasaServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] _parameter_map = { 'elevation': 'elevation' diff --git a/quest_provider_plugins/noaa_coastwatch.py b/quest_provider_plugins/noaa_coastwatch.py index 405950a8..9cf56fce 100644 --- a/quest_provider_plugins/noaa_coastwatch.py +++ b/quest_provider_plugins/noaa_coastwatch.py @@ -1,13 +1,13 @@ -"""QUEST wrapper for NCDC GHCN and GSOD Services.""" import os -import pandas as pd import param -from urllib.parse import quote, urlencode +import pandas as pd from urllib.error import HTTPError +from urllib.parse import quote, urlencode -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins from quest.util.log import logger +from quest.static import ServiceType, GeomType, DataType +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins class NoaaServiceBase(TimePeriodServiceBase): @@ -90,7 +90,7 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', - 'datatype': 'timeseries', + 'datatype': DataType.TIMESERIES, 'parameter': p.parameter, 'unit': units[self.parameter_code], 'service_id': 'svc://noaa:{}/{}'.format(self.service_name, catalog_id) @@ -137,10 +137,10 @@ class NoaaServiceNDBC(NoaaServiceBase): service_name = 'ndbc' display_name = 'NOAA National Data Buoy Center' description = 'NDBC Standard Meteorological Buoy Data' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-177.75, -27.705, 179.001, 71.758], @@ -195,10 +195,10 @@ class NoaaServiceCoopsMet(NoaaServiceBase): service_name = 'coops-meteorological' display_name = 'NOAA COOPS Met' description = 'Center for Operational Oceanographic Products and Services' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], @@ -277,10 +277,10 @@ class NoaaServiceCoopsWater(NoaaServiceBase): service_name = 'coops-water' display_name = 'NOAA COOPS Water' description = 'Center for Operational Oceanographic Products and Services' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], diff --git a/quest_provider_plugins/noaa_ncdc.py b/quest_provider_plugins/noaa_ncdc.py index a14b08dc..f0091522 100644 --- a/quest_provider_plugins/noaa_ncdc.py +++ b/quest_provider_plugins/noaa_ncdc.py @@ -1,10 +1,12 @@ -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins -from ulmo.ncdc import ghcn_daily, gsod -import pandas as pd -import param import os -# from ulmo.ncdc.ghcn_daily.core import _get_inventory as _get_ghcn_inventory +import param +import pandas as pd +from ulmo.ncdc import ghcn_daily, gsod + +from quest.static import ServiceType, GeomType, DataType +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins + BASE_PATH = 'ncdc' @@ -92,7 +94,7 @@ def download(self, catalog_id, file_path, dataset, **kwargs): metadata = { 'file_path': file_path, 'file_format': 'timeseries-hdf5', - 'datatype': 'timeseries', + 'datatype': DataType.TIMESERIES, 'parameter': self.parameter, 'unit': self._unit_map[self.parameter], 'service_id': 'svc://ncdc:{}/{}'.format(self.service_name, catalog_id) @@ -110,10 +112,10 @@ class NcdcServiceGhcnDaily(NcdcServiceBase): service_name = 'ghcn-daily' display_name = 'NCDC GHCN Daily' description = 'Daily Meteorologic Data from the Global Historic Climate Network' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90], @@ -162,10 +164,10 @@ class NcdcServiceGsod(NcdcServiceBase): service_name = 'gsod' display_name = 'NCDC GSOD' description = 'Daily Meteorologic Data from the Global Summary of the Day' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Worldwide'] bounding_boxes = [ [-180, -90, 180, 90] diff --git a/quest_provider_plugins/quest_catalog.py b/quest_provider_plugins/quest_catalog.py index 3f2e03ac..db7d9148 100644 --- a/quest_provider_plugins/quest_catalog.py +++ b/quest_provider_plugins/quest_catalog.py @@ -1,8 +1,9 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase -from quest.database.database import select_catalog_entries -from quest.static import ServiceType import pandas as pd +from quest.static import ServiceType, GeomType +from quest.database.database import select_catalog_entries +from quest.plugins import ProviderBase, SingleFileServiceBase + class QuestCatalogService(SingleFileServiceBase): service_name = 'quest' @@ -10,7 +11,7 @@ class QuestCatalogService(SingleFileServiceBase): description = 'Quest Catalog uses a database for derived datasets from Quest.' service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = ['Point', 'Line', 'Polygon'] + geom_type = [GeomType.POINT, GeomType.LINE, GeomType.POLYGON] datatype = None geographical_areas = ['Worldwide'] bounding_boxes = [ diff --git a/quest_provider_plugins/usgs_ned.py b/quest_provider_plugins/usgs_ned.py index f2370183..a7ccf1af 100644 --- a/quest_provider_plugins/usgs_ned.py +++ b/quest_provider_plugins/usgs_ned.py @@ -1,17 +1,21 @@ -from quest.plugins import ProviderBase, SingleFileServiceBase -from quest import util -from ulmo.usgs import ned import os +from ulmo.usgs import ned + +from quest import util +from quest.static import ServiceType, DataType, GeomType +from quest.plugins import ProviderBase, SingleFileServiceBase + + DEFAULT_FILE_PATH = os.path.join('usgs','ned') CACHE_FILE = 'ned_%s_metadata.json' class UsgsNedServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False - geom_type = 'polygon' - datatype = 'raster' + geom_type = GeomType.POLYGON + datatype = DataType.RASTER geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [[-180, -90, 180, 90]] _parameter_map = { diff --git a/quest_provider_plugins/usgs_nlcd.py b/quest_provider_plugins/usgs_nlcd.py index 75b2d8cb..f85140c2 100644 --- a/quest_provider_plugins/usgs_nlcd.py +++ b/quest_provider_plugins/usgs_nlcd.py @@ -1,13 +1,13 @@ -"""providers based on www.sciencebase.gov.""" - -import pandas as pd import requests -from quest.plugins import ProviderBase, SingleFileServiceBase +import pandas as pd + from quest import util +from quest.static import ServiceType, DatasetSource +from quest.plugins import ProviderBase, SingleFileServiceBase class UsgsNlcdServiceBase(SingleFileServiceBase): - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = False geom_type = 'polygon' datatype = 'discrete-raster' @@ -85,17 +85,7 @@ class UsgsNlcdProvider(ProviderBase): service_list = [UsgsNlcdService2001, UsgsNlcdService2006, UsgsNlcdService2011] display_name = 'National Land Cover Database' description = 'The National Land Cover Database products are created through a cooperative project conducted by ' \ - 'the Multi-Resolution Land Characteristics (MRLC) Consortium. The MRLC Consortium is a partnership ' \ - 'of federal agencies (www.mrlc.gov), consisting of ' \ - 'the U.S. Geological Survey (USGS), ' \ - 'the National Oceanic and Atmospheric Administration (NOAA), ' \ - 'the U.S. Environmental Protection Agency (EPA), ' \ - 'the U.S. Department of Agriculture -Forest Service (USDA-FS), ' \ - 'the National Park Service (NPS), ' \ - 'the U.S. Fish and Wildlife Service (FWS), ' \ - 'the Bureau of Land Management (BLM), and ' \ - 'the USDA Natural Resources Conservation Service (NRCS).' - organization_name = 'United States Geological Survey' + 'the Multi-Resolution Land Characteristics (MRLC) Consortium.' organization_abbr = 'USGS' name = 'usgs-nlcd' diff --git a/quest_provider_plugins/usgs_nwis.py b/quest_provider_plugins/usgs_nwis.py index ba18a681..72ee5e85 100644 --- a/quest_provider_plugins/usgs_nwis.py +++ b/quest_provider_plugins/usgs_nwis.py @@ -1,13 +1,15 @@ -"""QUEST wrapper for USGS NWIS Services.""" +import os -from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins -import concurrent.futures -from functools import partial +import param import pandas as pd -import os +import concurrent.futures from ulmo.usgs import nwis +from functools import partial + from quest import util -import param +from quest.static import ServiceType, GeomType, DataType +from quest.plugins import ProviderBase, TimePeriodServiceBase, load_plugins + BASE_PATH = 'usgs-nwis' @@ -69,7 +71,7 @@ def download(self, catalog_id, file_path, dataset, **kwargs): 'metadata': data, 'file_path': file_path, 'file_format': 'timeseries-hdf5', - 'datatype': 'timeseries', + 'datatype': DataType.TIMESERIES, 'parameter': parameter, 'unit': data['variable']['units']['code'], 'service_id': 'svc://usgs-nwis:{}/{}'.format(self.service_name, catalog_id) @@ -105,7 +107,7 @@ def get_parameters(self, catalog_ids=None): chunks = list(_chunks(df.index.tolist())) func = partial(_site_info, service=self.service_name) - with concurrent.futures.ProcessPoolExecutor() as executor: + with concurrent.futures.ProcessPoolExecutor(max_workers=None) as executor: data = executor.map(func, chunks) data = pd.concat(data, ignore_index=True) @@ -145,10 +147,10 @@ class NwisServiceIV(NwisServiceBase): service_name = 'iv' display_name = 'NWIS Instantaneous Values Service' description = 'Retrieve current streamflow and other real-time data for USGS water sites since October 1, 2007' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [ (-178.19453125, 51.6036621094, -130.0140625, 71.4076660156), @@ -168,10 +170,10 @@ class NwisServiceDV(NwisServiceBase): display_name = 'NWIS Daily Values Service' description = 'Retrieve historical summarized daily data about streams, lakes and wells. Daily data available ' \ 'for USGS water sites include mean, median, maximum, minimum, and/or other derived values.' - service_type = 'geo-discrete' + service_type = ServiceType.GEO_DISCRETE unmapped_parameters_available = True - geom_type = 'Point' - datatype = 'timeseries' + geom_type = GeomType.POINT + datatype = DataType.TIMESERIES geographical_areas = ['Alaska', 'USA', 'Hawaii'] bounding_boxes = [ (-178.19453125, 51.6036621094, -130.0140625, 71.4076660156), diff --git a/quest_provider_plugins/wmts_imagery.py b/quest_provider_plugins/wmts_imagery.py index a542f760..af12fe46 100644 --- a/quest_provider_plugins/wmts_imagery.py +++ b/quest_provider_plugins/wmts_imagery.py @@ -277,6 +277,8 @@ def _download_and_stitch_tiles(url, tile_indices, crop_bbox, zoom_level, max_til raise ValueError("{} tiles were requested, which exceeds the maximum tile limit of {}. " "Either increase the tile limit (max_tiles) or decrease the zoom level." .format(total_number_of_tiles, max_tiles)) + else: + log.info("There are {} tiles to download.".format(total_number_of_tiles)) # calculate full image height and width (count is calculated on the first time in the loop) height = number_of_y_tiles * TILE_SIZE diff --git a/quest_tool_plugins/raster/rst_base.py b/quest_tool_plugins/raster/rst_base.py index b069960b..7d65f8df 100644 --- a/quest_tool_plugins/raster/rst_base.py +++ b/quest_tool_plugins/raster/rst_base.py @@ -1,26 +1,24 @@ -"""Functions required run raster filters""" - import rasterio -from quest.plugins import ToolBase from quest import util +from quest.plugins import ToolBase from quest.api import get_metadata - +from quest.static import DataType, UriType class RstBase(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) def _run_tool(self): diff --git a/quest_tool_plugins/raster/rst_merge.py b/quest_tool_plugins/raster/rst_merge.py index 4275a41a..e1158799 100644 --- a/quest_tool_plugins/raster/rst_merge.py +++ b/quest_tool_plugins/raster/rst_merge.py @@ -1,19 +1,21 @@ -from quest.plugins import ToolBase -from quest import util -from quest.api import get_metadata, update_metadata +import param import rasterio -import rasterio.merge import rasterio.mask -from shapely.geometry import box -from fiona.crs import from_epsg +import rasterio.merge import geopandas as gpd -import param +from fiona.crs import from_epsg +from shapely.geometry import box + +from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType +from quest.api import get_metadata, update_metadata class RstMerge(ToolBase): _name = 'raster-merge' group = 'Multi-dataset' - operates_on_datatype = ['raster','discrete-raster'] + operates_on_datatype = [DataType.RASTER, 'discrete-raster'] datasets = util.param.DatasetListSelector(default=None, doc="""Dataset to run tool on.""", diff --git a/quest_tool_plugins/raster/rst_reprojection.py b/quest_tool_plugins/raster/rst_reprojection.py index a1420778..e3a69434 100644 --- a/quest_tool_plugins/raster/rst_reprojection.py +++ b/quest_tool_plugins/raster/rst_reprojection.py @@ -1,19 +1,20 @@ -from quest.plugins import ToolBase -from quest import util -from quest.api import get_metadata, update_metadata +import param import rasterio import subprocess -import param +from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType +from quest.api import get_metadata, update_metadata class RstReprojection(ToolBase): _name = 'raster-reprojection' - operates_on_datatype = ['raster', 'discrete-raster'] + operates_on_datatype = [DataType.RASTER, 'discrete-raster'] dataset = util.param.DatasetSelector(default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) new_crs = param.String(default=None, doc="""New coordinate reference system to project to""") diff --git a/quest_tool_plugins/timeseries/timeseries.py b/quest_tool_plugins/timeseries/timeseries.py index 4efdfedb..c2a20770 100644 --- a/quest_tool_plugins/timeseries/timeseries.py +++ b/quest_tool_plugins/timeseries/timeseries.py @@ -1,6 +1,3 @@ -"""Timeseries Tools - -""" import param from quest.util import setattr_on_dataframe, unit_list, unit_registry diff --git a/quest_tool_plugins/timeseries/ts_base.py b/quest_tool_plugins/timeseries/ts_base.py index 945d9395..6fd463c9 100644 --- a/quest_tool_plugins/timeseries/ts_base.py +++ b/quest_tool_plugins/timeseries/ts_base.py @@ -1,22 +1,23 @@ +from quest import util from quest.plugins import ToolBase from quest.api import get_metadata -from quest import util from quest.plugins import load_plugins +from quest.static import UriType, DataType class TsBase(ToolBase): # metadata attributes group = 'Timeseries' - operates_on_datatype = ['timeseries'] + operates_on_datatype = [DataType.TIMESERIES] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['timeseries'] + produces_datatype = [DataType.TIMESERIES] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'timeseries'}, + filters={'datatype': DataType.TIMESERIES}, ) def _run_tool(self): diff --git a/quest_tool_plugins/timeseries/ts_flow_duration.py b/quest_tool_plugins/timeseries/ts_flow_duration.py index ccc56101..081d498c 100644 --- a/quest_tool_plugins/timeseries/ts_flow_duration.py +++ b/quest_tool_plugins/timeseries/ts_flow_duration.py @@ -1,8 +1,9 @@ +from quest import util from quest.plugins import ToolBase from quest.api import get_metadata -from quest import util -from quest.util import setattr_on_dataframe +from quest.static import DataType, UriType from quest.plugins import load_plugins +from quest.util import setattr_on_dataframe class TsFlowDuration(ToolBase): @@ -11,7 +12,7 @@ class TsFlowDuration(ToolBase): dataset = util.param.DatasetSelector(default=None, doc="""Dataset to apply filter to.""", - filters={'datatype': 'timeseries'}, + filters={'datatype': DataType.TIMESERIES}, ) def _run_tool(self): @@ -60,4 +61,4 @@ def _run_tool(self): output = load_plugins('io', 'xy-hdf5')['xy-hdf5'] output.write(file_path, new_df, new_metadata) - return {'datasets': new_dset, 'catalog_entries': catalog_entry} \ No newline at end of file + return {'datasets': new_dset, 'catalog_entries': catalog_entry} diff --git a/quest_tool_plugins/whitebox/whitebox_utils.py b/quest_tool_plugins/whitebox/whitebox_utils.py index 67a37306..80f8865a 100644 --- a/quest_tool_plugins/whitebox/whitebox_utils.py +++ b/quest_tool_plugins/whitebox/whitebox_utils.py @@ -1,20 +1,22 @@ -import logging import os -import inspect import re import time +import logging +import inspect from functools import wraps -import whitebox_tools import rasterio -import xarray as xr import numpy as np +import xarray as xr import pandas as pd +import whitebox_tools import geopandas as gpd from shapely.geometry import Point, shape +from quest.static import DataType from quest.util import listify, convert_nodata_to_nans + whitebox_log = logging.getLogger('whitebox') whitebox_log.addHandler(logging.NullHandler()) whitebox_log.propagate = True diff --git a/quest_tool_plugins/whitebox/whitebox_watershed.py b/quest_tool_plugins/whitebox/whitebox_watershed.py index 9ec5b3dc..c5d7336f 100644 --- a/quest_tool_plugins/whitebox/whitebox_watershed.py +++ b/quest_tool_plugins/whitebox/whitebox_watershed.py @@ -1,8 +1,9 @@ -import numpy as np import param +import numpy as np -from quest.plugins import ToolBase from quest import util +from quest.plugins import ToolBase +from quest.static import DataType, UriType, GeomType from quest.api import get_metadata, update_metadata, open_dataset from .whitebox_utils import wbt, points_to_shp, raster_to_polygons @@ -13,17 +14,17 @@ class WBTFillDepressions(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) def _run_tool(self): @@ -57,17 +58,17 @@ class WBTExtractStreamsWorkflow(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) stream_threshold = param.Number( @@ -131,29 +132,29 @@ class WBTWatershedDelineationWorkflow(ToolBase): # metadata attributes group = 'raster' - operates_on_datatype = ['raster'] + operates_on_datatype = [DataType.RASTER] operates_on_geotype = None operates_on_parameters = None - produces_datatype = ['raster'] + produces_datatype = [DataType.RASTER] produces_geotype = None produces_parameters = None elevation_dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) streams_dataset = util.param.DatasetSelector( default=None, doc="""Dataset to run tool on.""", - filters={'datatype': 'raster'}, + filters={'datatype': DataType.RASTER}, ) outlets = util.param.CatalogEntrySelector( default=None, doc="""Point geometry to use for the outlet.""", - filters={'geom_type': 'point'}, + filters={'geom_type': GeomType.POINT}, ) snap_distance = param.Number( diff --git a/test/data.py b/test/data.py index 068f3406..e131341f 100644 --- a/test/data.py +++ b/test/data.py @@ -352,6 +352,6 @@ ('svc://usgs-nlcd:2001/5a1c65a5e4b09fc93dd648f1', None), ('svc://usgs-nlcd:2006/5a1c35b6e4b09fc93dd64011', None), ('svc://usgs-nlcd:2011/5a1c31abe4b09fc93dd6381c', None), - ('svc://usgs-nwis:dv/01010000', {'parameter': 'streamflow:mean:daily', 'start': '2016-01-01', 'end': '2016-01-02'}), ('svc://usgs-nwis:iv/01010000', {'parameter': 'gage_height', 'start': '2016-01-01', 'end': '2016-01-02'}), + ('svc://usgs-nwis:dv/01010000', {'parameter': 'streamflow:mean:daily', 'start': '2016-01-01', 'end': '2016-01-02'}), ] diff --git a/test/test_catalog.py b/test/test_catalog.py index 78a6687c..77b56af5 100644 --- a/test/test_catalog.py +++ b/test/test_catalog.py @@ -1,5 +1,6 @@ import pytest +from quest.static import GeomType from data import SERVICES_CATALOG_COUNT, CACHED_SERVICES ACTIVE_PROJECT = 'project1' @@ -60,12 +61,12 @@ def test_search_catalog_with_query(api): def test_new_catalog_entry(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) assert c in api.get_metadata(c) def test_delete_catalog_entry(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) d = api.new_dataset(collection='col1', catalog_entry=c, source='derived') api.delete(d) assert d not in api.get_datasets() @@ -74,7 +75,7 @@ def test_delete_catalog_entry(api): def test_delete_derived_dataset(api): - c = api.new_catalog_entry(geom_type='Point', geom_coords=[-94.2, 23.4]) + c = api.new_catalog_entry(geom_type=GeomType.POINT, geom_coords=[-94.2, 23.4]) d = api.add_datasets(collection='col1', catalog_entries=[c, c]) api.delete(d[0]) assert d[0] not in api.get_datasets() diff --git a/test/test_datasets.py b/test/test_datasets.py index 1dbec139..4ad21763 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -80,7 +80,6 @@ def test_new_dataset(api): new_dataset = api.new_dataset(CATALOG_ENTRY, 'col1') datasets = api.get_datasets() try: - # test number of datasets actual = len(datasets) expected = 2 assert actual == expected diff --git a/test/test_providers.py b/test/test_providers.py index 095e5ca9..36669be7 100644 --- a/test/test_providers.py +++ b/test/test_providers.py @@ -1,8 +1,12 @@ -from data import ALL_SERVICES, SERVICE_DOWNLOAD_OPTIONS -from conftest import FILES_DIR -import pytest import os +import pytest + +from conftest import FILES_DIR +from quest.static import DatasetStatus +from data import ALL_SERVICES, SERVICE_DOWNLOAD_OPTIONS + + pytestmark = pytest.mark.usefixtures('reset_projects_dir') @@ -35,4 +39,4 @@ def test_download(api, catalog_entry, options): d = api.add_datasets('test', catalog_entry)[0] api.stage_for_download(d, options=options) result = api.download_datasets(d, raise_on_error=True) - assert result[d] == 'downloaded' + assert result[d[0]] == DatasetStatus.DOWNLOADED diff --git a/test/test_util_misc.py b/test/test_util_misc.py index 61f4e3fa..71b49bea 100644 --- a/test/test_util_misc.py +++ b/test/test_util_misc.py @@ -8,7 +8,9 @@ def test_get_quest_dir(reset_projects_dir): def test_get_cache_data_dir(reset_projects_dir): - assert quest.util.get_cache_dir() == os.path.join(reset_projects_dir['BASE_DIR'], os.path.join('.cache', 'test_cache')) + actual = quest.util.get_cache_dir() + expected = os.path.join(reset_projects_dir['BASE_DIR'], os.path.join('.cache', 'test_cache')) + assert actual == expected folder_obj = tempfile.TemporaryDirectory() folder = folder_obj.name @@ -67,13 +69,13 @@ def test_bbox2poly(): [-160.0, 20.0], [-160.0, -20.0], [-180, -20.0]], - "type": "Polygon"}, + "type": quest.static.GeomType.POLYGON}, {"coordinates": [[160.0, -20.0], [160.0, 20.0], [180, 20.0], [180, -20.0], [160.0, -20.0]], - "type": "Polygon"}], + "type": quest.static.GeomType.POLYGON}], "type": "MultiPolygon"} poly = quest.util.bbox2poly(*bbox, as_shapely=True) @@ -91,7 +93,7 @@ def test_bbox2poly(): [10.0, 10.0], [10.0, -10.0], [-10.0, -10.0]], - 'type': 'Polygon'} + 'type': quest.static.GeomType.POLYGON} bbox = 160, -20, 200, 20 @@ -103,13 +105,13 @@ def test_bbox2poly(): [180, 20.0], [180, -20.0], [160.0, -20.0]], - 'type': 'Polygon'}, + 'type': quest.static.GeomType.POLYGON}, {'coordinates': [[-180, -20.0], [-180, 20.0], [-160.0, 20.0], [-160.0, -20.0], [-180, -20.0]], - 'type': 'Polygon'}], + 'type': quest.static.GeomType.POLYGON}], 'type': 'MultiPolygon'} poly = quest.util.bbox2poly(*bbox)