Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
db25e0e
update with latest changes
acwooding Dec 30, 2022
8223996
clean up
acwooding Dec 31, 2022
ef04660
sync with snp changes, especially descr -> readme and extra -> fileset
acwooding Dec 31, 2022
58db188
fix typos
acwooding Dec 31, 2022
6c6d7ef
add missing file
acwooding Dec 31, 2022
210e7c5
fix test dataset generation
acwooding Dec 31, 2022
944c394
remove use of src
acwooding Dec 31, 2022
4f2d118
fix typo
acwooding Dec 31, 2022
6f60931
try using a miniconda image
acwooding Dec 31, 2022
d3cbe67
remove comments
acwooding Dec 31, 2022
49153b1
Updated config.yml
acwooding Dec 31, 2022
0649fc3
Updated config.yml
acwooding Dec 31, 2022
41907d3
Updated config.yml
acwooding Dec 31, 2022
39c6111
Updated config.yml
acwooding Dec 31, 2022
1ba37cd
Updated config.yml
acwooding Dec 31, 2022
951b806
Updated config.yml
acwooding Dec 31, 2022
dbe1a1a
Updated config.yml
acwooding Dec 31, 2022
9f2d43a
Updated config.yml
acwooding Dec 31, 2022
ea35564
update extra -> fileset and descr -> readme
acwooding Dec 31, 2022
0be8282
change to lowercase
acwooding Dec 31, 2022
870f034
Merge pull request #242 from acwooding/sync-snp
acwooding Jan 10, 2023
8920b93
handle arbitraty conda channels
acwooding Jan 25, 2023
baa9fa2
use the template python version
acwooding Jan 25, 2023
09c1fd4
add test of the new environment code
acwooding Jan 25, 2023
ceb636b
add missing file
acwooding Jan 25, 2023
9739191
fix indentation
acwooding Jan 25, 2023
25e41e3
fix typo and remove test file
acwooding Jan 25, 2023
f0f5481
for latest, defautl to conda latest python version and remove test file
acwooding Jan 25, 2023
b22f548
handle situation where channel-order doesn't exist
acwooding Jan 25, 2023
756485e
use a windows friendly loop
acwooding Jan 31, 2023
6e69de1
update environment management instructions to include channel order a…
acwooding Feb 1, 2023
87159f7
Update README.md
acwooding Feb 1, 2023
4b541c9
remove travis.ci testing
acwooding Feb 1, 2023
d233bfb
fix help messages. Some of these should not display
hackalog Feb 1, 2023
7c6b736
remove lint target. We don't currently use this
hackalog Feb 1, 2023
223c1fb
change this warning to a variable on the standard help page
hackalog Feb 1, 2023
c29fed2
modernize the template
acwooding Feb 1, 2023
ed3f4c2
Merge pull request #247 from acwooding/remove-travis
hackalog Feb 1, 2023
b9ff13f
Merge pull request #248 from acwooding/update-template
hackalog Feb 1, 2023
b7aff58
Merge pull request #246 from hackalog/acwooding-patch-1
hackalog Feb 1, 2023
2e4563b
Merge pull request #1 from hackalog/extend_env_fixes
acwooding Feb 1, 2023
ad6ead4
Merge pull request #245 from acwooding/extend-environment
hackalog Feb 1, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
docker:
# specify the version you desire here
# use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
- image: cimg/python:3.8.0
- image: continuumio/miniconda3

# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand All @@ -19,39 +19,38 @@ jobs:

steps:
- checkout

- run:
name: Set up Anaconda
name: Set up Conda
command: |
wget -q http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh;
chmod +x ~/miniconda.sh;
~/miniconda.sh -b -p ~/miniconda;
export PATH=~/miniconda/bin:$PATH
echo "export PATH=~/miniconda/bin:$PATH" >> $BASH_ENV;
conda update --yes --quiet conda;
conda init bash
sed -ne '/>>> conda initialize/,/<<< conda initialize/p' ~/.bashrc >> $BASH_ENV

conda update --yes --quiet conda;
export CONDA_EXE=/opt/conda/bin/conda
sed -ne '/>>> conda initialize/,/<<< conda initialize/p' ~/.bashrc >> $BASH_ENV

- run:
name: Build cookiecutter environment and test-env project
command: |
conda create -n cookiecutter --yes python=3.8
conda create -n cookiecutter --yes python=3.8 make
conda activate cookiecutter
pip install cookiecutter
pip install ruamel.yaml
mkdir /home/circleci/.cookiecutter_replay
cp circleci-cookiecutter-easydata.json /home/circleci/.cookiecutter_replay/cookiecutter-easydata.json
mkdir -p /root/repo/.cookiecutter_replay
cp circleci-cookiecutter-easydata.json /root/repo/.cookiecutter_replay/cookiecutter-easydata.json
pwd
which make
cookiecutter --config-file .cookiecutter-easydata-test-circleci.yml . -f --no-input
conda deactivate

- run:
name: Create test-env environment and contrive to always use it
command: |
conda activate cookiecutter
cd test-env
export CONDA_EXE=/home/circleci/miniconda/bin/conda
export CONDA_EXE=/opt/conda/bin/conda
make create_environment
python scripts/tests/add-extra-channel-dependency.py
conda activate test-env
conda install -c anaconda make
touch environment.yml
make update_environment
echo "conda activate test-env" >> $BASH_ENV;
Expand Down
51 changes: 0 additions & 51 deletions .travis.yml

This file was deleted.

18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ python -m pip install -f requirements.txt

cookiecutter https://github.com/hackalog/easydata

### To find out more
------------
A good place to start is with reproducible environments. We have a tutorial here: [Getting Started with EasyData Environments](https://github.com/hackalog/easydata/wiki/Getting-Started-with-EasyData-Environments).

The next place to look is in the customized documentation that is in any EasyData created repo. It is customized to the settings that you put in your template. These are reference documents that can be found under `references/easydata` that are customized to your repo that cover:
* more on conda environments
* more on paths
* git configuration (including setting up ssh with GitHub)
* git workflows
* tricks for using Jupyter notebooks in an EasyData environment
* troubleshooting
* recommendations for how to share your work

Furthermore, see:
* [The EasyData documentation on read the docs](https://cookiecutter-easydata.readthedocs.io/en/latest/?badge=latest): this contains up-to-date working exmaples of how to use EasyData for reproducible datasets and some ways to use notebooks reproducibly
* [Talks and Tutorials based on EasyData](https://github.com/hackalog/easydata/wiki/EasyData-Talks-and-Tutorials)
* [Catalog of EasyData Documentation](https://github.com/hackalog/easydata/wiki/Catalog-of-EasyData-Documentation)
* [The EasyData wiki](https://github.com/hackalog/easydata/wiki) Check here for further troubleshooting and how-to guides for particular problems that aren't in the `references/easydata` docs (including a `git` tutorial)

### The resulting directory structure
------------
Expand Down
6 changes: 3 additions & 3 deletions cookiecutter.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"project_name": "project_name",
"repo_name": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
"default_branch": ["master", "main"],
"default_branch": ["main", "master"],
"module_name": "src",
"author_name": "Your name (or your organization/company/team)",
"author_name": "Your name (or the copyright holder)",
"description": "A short description of this project.",
"open_source_license": ["MIT", "BSD-2-Clause", "Proprietary"],
"python_version": ["3.7", "3.6", "latest", "3.8"],
"python_version": ["latest", "3.11", "3.10", "3.9", "3.8", "3.7"],
"conda_path": "~/anaconda3/bin/conda",
"upstream_location": ["github.com", "gitlab.com", "bitbucket.org", "your-custom-repo"]
}
2 changes: 1 addition & 1 deletion docs/00-xyz-sample-notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
14 changes: 7 additions & 7 deletions docs/Add-csv-template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"* `csv_path`: The desired path to your .csv file (in this case `epidemiology.csv`) relative to paths['raw_data_path']\n",
"* `download_message`: The message to display to indicate to the user how to manually download your .csv file.\n",
"* `license_str`: Information on the license for the dataset\n",
"* `descr_str`: Information on the dataset itself"
"* `readme_str`: Information on the dataset itself"
]
},
{
Expand Down Expand Up @@ -123,7 +123,7 @@
"metadata": {},
"outputs": [],
"source": [
"descr_str = \"\"\"\n",
"readme_str = \"\"\"\n",
"The epidemiology table from Google's [COVID-19 Open-Data dataset](https://github.com/GoogleCloudPlatform/covid-19-open-data). \n",
"\n",
"The full dataset contains datasets of daily time-series data related to COVID-19 for over 20,000 distinct locations around the world. The data is at the spatial resolution of states/provinces for most regions and at county/municipality resolution for many countries such as Argentina, Brazil, Chile, Colombia, Czech Republic, Mexico, Netherlands, Peru, United Kingdom, and USA. All regions are assigned a unique location key, which resolves discrepancies between ISO / NUTS / FIPS codes, etc. The different aggregation levels are:\n",
Expand Down Expand Up @@ -170,7 +170,7 @@
" csv_path=csv_path,\n",
" download_message=download_message,\n",
" license_str=license_str,\n",
" descr_str=descr_str,\n",
" readme_str=readme_str,\n",
" overwrite_catalog=True)"
]
},
Expand Down Expand Up @@ -206,9 +206,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"By default, the workflow helper function also created a `covid-19-epidemiology_raw` dataset that has an empty `ds.data`, but keeps a record of the location of the final `epidemiology.csv` file relative to in `ds.EXTRA`.\n",
"By default, the workflow helper function also created a `covid-19-epidemiology_raw` dataset that has an empty `ds.data`, but keeps a record of the location of the final `epidemiology.csv` file relative to in `ds.FILESET`.\n",
"\n",
"The `.EXTRA` functionality is covered in other documentation."
"The `.FILESET` functionality is covered in other documentation."
]
},
{
Expand Down Expand Up @@ -236,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds_raw.EXTRA"
"ds_raw.FILESET"
]
},
{
Expand All @@ -246,7 +246,7 @@
"outputs": [],
"source": [
"# fq path to epidemiology.csv file\n",
"ds_raw.extra_file('epidemiology.csv')"
"ds_raw.fileset_file('epidemiology.csv')"
]
},
{
Expand Down
10 changes: 5 additions & 5 deletions docs/Add-derived-dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down Expand Up @@ -219,7 +219,7 @@
" source_dataset_name\n",
" dataset_name\n",
" data_function\n",
" added_descr_txt\n",
" added_readme_txt\n",
"\n",
"We'll want our `data_function` to be defined in the project module (in this case `src`) for reproducibility reasons (which we've already done with `subselect_by_key` above)."
]
Expand Down Expand Up @@ -250,7 +250,7 @@
"metadata": {},
"outputs": [],
"source": [
"added_descr_txt = f\"\"\"The dataset {dataset_name} is the subselection \\\n",
"added_readme_txt = f\"\"\"The dataset {dataset_name} is the subselection \\\n",
"to the {key} dataset.\"\"\""
]
},
Expand Down Expand Up @@ -281,7 +281,7 @@
" source_dataset_name=source_dataset_name,\n",
" dataset_name=dataset_name,\n",
" data_function=data_function,\n",
" added_descr_txt=added_descr_txt,\n",
" added_readme_txt=added_readme_txt,\n",
" overwrite_catalog=True)"
]
},
Expand Down Expand Up @@ -318,7 +318,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
12 changes: 6 additions & 6 deletions docs/New-Dataset-Template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
"metadata": {},
"source": [
"### Create a process function\n",
"By default, we recommend that you use the `process_extra_files` functionality and then use a transformer function to create a derived dataset, but you can optionally create your own."
"By default, we recommend that you use the `process_fileset_files` functionality and then use a transformer function to create a derived dataset, but you can optionally create your own."
]
},
{
Expand All @@ -176,11 +176,11 @@
"metadata": {},
"outputs": [],
"source": [
"from src.data.extra import process_extra_files\n",
"process_function = process_extra_files\n",
"from src.data.fileset import process_fileset_files\n",
"process_function = process_fileset_files\n",
"process_function_kwargs = {'file_glob':'*.csv',\n",
" 'do_copy': True,\n",
" 'extra_dir': ds_name+'.extra',\n",
" 'fileset_dir': ds_name+'.fileset',\n",
" 'extract_dir': ds_name}"
]
},
Expand Down Expand Up @@ -355,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds.EXTRA"
"ds.FILESET"
]
},
{
Expand All @@ -364,7 +364,7 @@
"metadata": {},
"outputs": [],
"source": [
"ds.extra_file('epidemiology.csv')"
"ds.fileset_file('epidemiology.csv')"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/New-Edge-Template.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"metadata": {},
"outputs": [],
"source": [
"source_ds.EXTRA"
"source_ds.FILESET"
]
},
{
Expand Down Expand Up @@ -178,7 +178,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(ds.DESCR)"
"print(ds.README)"
]
},
{
Expand Down
3 changes: 3 additions & 0 deletions docs/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import requests

from src import paths
from src.log import logger


CCDS_ROOT = Path(__file__).parents[1].resolve()
DOCS_DIR = CCDS_ROOT / "docs"
Expand All @@ -35,6 +37,7 @@ def test_notebook_csv(self):
csv_url = "https://storage.googleapis.com/covid19-open-data/v2/epidemiology.csv"
csv_dest = paths['raw_data_path'] / "epidemiology.csv"
if not csv_dest.exists():
logger.debug("Downloading epidemiology.csv")
csv_file = requests.get(csv_url)
with open(csv_dest, 'wb') as f:
f.write(csv_file.content)
Expand Down
11 changes: 2 additions & 9 deletions {{ cookiecutter.repo_name }}/.circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ jobs:
docker:
# specify the version you desire here
# use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers`
- image: circleci/python:3.7.0
- image: continuumio/miniconda3


# Specify service dependencies here if necessary
# CircleCI maintains a library of pre-built images
Expand All @@ -20,14 +21,6 @@ jobs:
steps:
- checkout

- run:
name: Set up Anaconda
command: |
wget -q http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh;
chmod +x ~/miniconda.sh;
~/miniconda.sh -b -p ~/miniconda;
echo "export PATH=~/miniconda/bin:$PATH" >> $BASH_ENV;

- run:
name: Create environment and contrive to always use it
command: |
Expand Down
9 changes: 2 additions & 7 deletions {{ cookiecutter.repo_name }}/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,12 @@ test: update_environment
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

## Run all Unit Tests with coverage
## Run all Unit and code coverage tests
test_with_coverage: update_environment
$(SET) LOGLEVEL=DEBUG; coverage run -m pytest --pyargs --doctest-modules --doctest-continue-on-failure --verbose \
$(if $(CI_RUNNING),--ignore=$(TESTS_NO_CI)) \
$(MODULE_NAME)

.PHONY: lint
## Lint using flake8
lint:
flake8 $(MODULE_NAME)

.phony: help_update_easydata
help_update_easydata:
@$(PYTHON_INTERPRETER) scripts/help-update.py
Expand All @@ -105,7 +100,7 @@ debug:
# Self Documenting Commands #
#################################################################################

HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM
HELP_VARS := PROJECT_NAME DEBUG_FILE ARCH PLATFORM SHELL

.DEFAULT_GOAL := show-help
.PHONY: show-help
Expand Down
Loading