Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
46bcdb1
Built-in BIDS support for `dandi upload`
TheChymera May 12, 2022
d508c7b
fixed logic for None input
TheChymera May 13, 2022
5531d3c
not using return value
TheChymera May 13, 2022
db3379c
Code improvements as suggested by jwodder
TheChymera Jun 6, 2022
0890e0a
Code style and no SystemExit in library function.
TheChymera Jun 7, 2022
7ba78d5
Corrected return type
TheChymera Jun 7, 2022
9ae2218
Fixed typing
TheChymera Jun 8, 2022
d698804
Better user prompt
TheChymera Jun 9, 2022
953f7b4
Validation return typing and docstring
TheChymera Jun 9, 2022
adf2605
Allowing upload of BIDS datasets relative to input path
TheChymera Jun 10, 2022
e10c8ea
No click usage outside of CLI library
TheChymera Jun 20, 2022
0492dbb
Not computing list if errors are suppressed
TheChymera Jun 27, 2022
d079d9c
Typing
TheChymera Jun 27, 2022
540923a
docstring fix
TheChymera Jun 30, 2022
a3b89dc
Separating validation evaluation logic from reporting
TheChymera Jun 30, 2022
c59bae2
fix typing
TheChymera Jun 30, 2022
358d2f2
Improve function naming
TheChymera Jun 30, 2022
62cf170
Testing data
TheChymera Jul 5, 2022
044ee28
Fixed typing
TheChymera Jul 5, 2022
ac37533
testing bids upload
TheChymera Jul 5, 2022
306b2f9
using shutil instead of deprecated distutils
TheChymera Jul 5, 2022
4847cbb
fixing copytree parameter
TheChymera Jul 5, 2022
9f82246
improved bids upload testing
TheChymera Jul 5, 2022
c230d26
Creating changes file
TheChymera Jul 5, 2022
bf3aba7
allowing existing directory
TheChymera Jul 5, 2022
7f58c05
boilerplate copytree
TheChymera Jul 5, 2022
50092ea
Check asset upload
TheChymera Jul 6, 2022
427a136
Testing upload with validation ignore
TheChymera Jul 6, 2022
2864c77
simplified path processing in BIDS lookup
TheChymera Jul 6, 2022
8139966
docstring formatting
TheChymera Jul 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions dandi/bids_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from .utils import pluralize


def is_valid(
validation_result: dict,
allow_invalid_filenames: bool = False,
allow_missing_files: bool = False,
) -> bool:
"""Determine whether a dataset validation result marks it as valid.

Parameters
----------
validation_result: dict
Dictionary as returned by `dandi.bids_validator_xs.validate_bids()`.
allow_missing_files: bool, optional
Whether to consider the dataset invalid if any mandatory files are not present.
allow_invalid_filenames: bool, optional
Whether to consider the dataset invalid if any filenames inside are invalid.

Returns
-------
bool: whether the dataset validation result marks it as valid.

"""

if allow_invalid_filenames and allow_missing_files:
return True
missing_files = [
i["regex"] for i in validation_result["schema_tracking"] if i["mandatory"]
]
invalid_filenames = validation_result["path_tracking"]

if missing_files and not allow_missing_files:
return False
if invalid_filenames and not allow_invalid_filenames:
return False
else:
return True


def report_errors(
validation_result: dict,
) -> None:
import click

missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
error_string,
bold=True,
fg="red",
)
else:
click.secho(
"All filenames are BIDS-valid and no mandatory files are missing.",
bold=True,
fg="green",
)
25 changes: 18 additions & 7 deletions dandi/bids_validator_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,17 +611,28 @@ def select_schema_dir(
)
else:
with open(dataset_description) as f:
dataset_info = json.load(f)
try:
schema_version = dataset_info["BIDSVersion"]
except KeyError:
lgr.warning(
"BIDSVersion is not specified in "
"`dataset_description.json`. "
"Falling back to %s.",
dataset_info = json.load(f)
except json.decoder.JSONDecodeError:
lgr.error(
"The `%s` file could not be loaded. "
"Please check whether the file is valid JSON. "
"Falling back to the %s BIDS version.",
dataset_description,
schema_min_version,
)
schema_version = schema_min_version
else:
try:
schema_version = dataset_info["BIDSVersion"]
except KeyError:
lgr.warning(
"BIDSVersion is not specified in "
"`dataset_description.json`. "
"Falling back to %s.",
schema_min_version,
)
schema_version = schema_min_version
if not schema_version:
lgr.warning(
"No BIDSVersion could be found for the dataset. Falling back to %s.",
Expand Down
34 changes: 6 additions & 28 deletions dandi/cli/cmd_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import click

from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
from ..utils import pluralize


@click.command()
Expand All @@ -26,43 +25,22 @@ def validate_bids(
paths, schema=None, devel_debug=False, report=False, report_flag=False
):
"""Validate BIDS paths."""
from ..bids_utils import is_valid, report_errors
from ..validate import validate_bids as validate_bids_

if report_flag and not report:
report = report_flag

validation_result = validate_bids_(
validator_result = validate_bids_(
*paths,
report=report,
schema_version=schema,
devel_debug=devel_debug,
)
missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
error_string,
bold=True,
fg="red",
)
valid = is_valid(validator_result)
report_errors(validator_result)

if not valid:
raise SystemExit(1)


Expand Down
36 changes: 36 additions & 0 deletions dandi/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,32 @@
lgr = get_logger()


def copytree(src, dst, symlinks=False, ignore=None):
"""Function mimicking `shutil.copytree()` behaviour but supporting existing target
directories.

Notes
-----
* This function can be removed and replaced by a call to `shutil.copytree()`
setting the `dirs_exist_ok` keyword argument to true, whenever Python 3.7
is no longer supported.

References
----------
https://docs.python.org/3/whatsnew/3.8.html#shutil
"""
if not os.path.exists(dst):
os.makedirs(dst)
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
copytree(s, d, symlinks, ignore)
else:
if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1:
shutil.copy2(s, d)


@pytest.fixture(autouse=True)
def capture_all_logs(caplog: pytest.LogCaptureFixture) -> None:
caplog.set_level(logging.DEBUG, logger="dandi")
Expand Down Expand Up @@ -411,6 +437,16 @@ def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset:
return new_dandiset


@pytest.fixture()
def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDandiset:
copytree(
os.path.join(bids_examples, "asl003"),
str(new_dandiset.dspath) + "/",
)
(new_dandiset.dspath / "CHANGES").write_text("0.1.0 2014-11-03\n")
return new_dandiset


@pytest.fixture()
def video_files(tmp_path):
video_paths = []
Expand Down
32 changes: 32 additions & 0 deletions dandi/tests/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,38 @@ def test_upload_sync_folder(
text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt")


def test_upload_bids(mocker: MockerFixture, bids_dandiset: SampleDandiset) -> None:
iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload")
bids_dandiset.upload(existing="forced")
# Check whether upload was run
iter_upload_spy.assert_called()
# Check existence of assets:
dandiset = bids_dandiset.dandiset
# file we created?
dandiset.get_asset_by_path("CHANGES")
# BIDS descriptor file?
dandiset.get_asset_by_path("dataset_description.json")
# actual data file?
dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz")


def test_upload_bids_validation_ignore(
mocker: MockerFixture, bids_dandiset: SampleDandiset
) -> None:
iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload")
bids_dandiset.upload(existing="forced", validation="ignore")
# Check whether upload was run
iter_upload_spy.assert_called()
# Check existence of assets:
dandiset = bids_dandiset.dandiset
# file we created?
dandiset.get_asset_by_path("CHANGES")
# BIDS descriptor file?
dandiset.get_asset_by_path("dataset_description.json")
# actual data file?
dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz")


def test_upload_sync_zarr(mocker, zarr_dandiset):
rmtree(zarr_dandiset.dspath / "sample.zarr")
zarr.save(zarr_dandiset.dspath / "identity.zarr", np.eye(5))
Expand Down
73 changes: 73 additions & 0 deletions dandi/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import click

from . import lgr
from .bids_utils import is_valid
from .consts import DRAFT, dandiset_identifier_regex, dandiset_metadata_file
from .dandiapi import RemoteAsset
from .exceptions import NotFoundError
Expand Down Expand Up @@ -61,6 +62,20 @@ def upload(
" paths. Use 'dandi download' or 'organize' first."
)

# Pre-validate BIDS datasets before going for individual files.
bids_datasets = _bids_discover_and_validate(dandiset_.path, validation)

if bids_datasets:
_bids_datasets = [str(i) for i in bids_datasets]
if not allow_any_path:
lgr.info(
"Enabling --allow-any-path since we detected %s under the following "
"paths: %s",
pluralize(len(_bids_datasets), "BIDS dataset"),
", ".join(_bids_datasets),
)
allow_any_path = True

instance = get_instance(dandi_instance)
assert instance.api is not None
api_url = instance.api
Expand Down Expand Up @@ -383,3 +398,61 @@ def check_replace_asset(

def skip_file(msg: Any) -> Dict[str, str]:
return {"status": "skipped", "message": str(msg)}


def _bids_discover_and_validate(
dandiset_path: str,
validation: Optional[str] = "require",
) -> List[Path]:
"""Temporary implementation for discovery and validation of BIDS datasets

References:
- unification of validation records: https://github.com/dandi/dandi-cli/issues/943
- validation "design doc": https://github.com/dandi/dandi-cli/pull/663
"""
from .utils import find_files
from .validate import validate_bids

bids_descriptions = map(
Path, find_files(r"(^|[/\x5C])dataset_description\.json$", dandiset_path)
)
bids_datasets = [bd.parent for bd in bids_descriptions]
if bids_datasets:
lgr.debug(
"Detected %s under following paths: %s",
pluralize(len(bids_datasets), "BIDS dataset"),
", ".join(str(i) for i in bids_datasets),
)

if validation != "skip":
if bids_datasets:
bids_datasets_to_validate = list()
for p in bids_datasets:
for bd in bids_datasets:
try:
p.relative_to(bd)
except ValueError:
try:
bd.relative_to(p)
except ValueError:
pass
else:
bids_datasets_to_validate.append(bd)
else:
bids_datasets_to_validate.append(bd)
else:
bids_datasets_to_validate = bids_datasets
bids_datasets_to_validate.sort()
validated_datasets = []
for bd in bids_datasets_to_validate:
validator_result = validate_bids(bd)
valid = is_valid(
validator_result,
allow_missing_files=validation == "ignore",
allow_invalid_filenames=validation == "ignore",
)
if valid:
validated_datasets.append(bd)
return validated_datasets
else:
return bids_datasets
16 changes: 12 additions & 4 deletions dandi/validate.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from typing import Any, Iterator, List, Optional, Tuple
from pathlib import Path
from typing import Iterator, List, Optional, Tuple, Union

from .files import find_dandi_files

# TODO: provide our own "errors" records, which would also include warnings etc


def validate_bids(
*paths: str,
*paths: Union[str, Path],
schema_version: Optional[str] = None,
devel_debug: bool = False,
report: Optional[str] = None,
) -> Any:
) -> dict:
"""Validate BIDS paths.

Parameters
Expand All @@ -26,16 +27,23 @@ def validate_bids(
If string, the string will be used as the output path.
If the variable evaluates as False, no log will be written.

Returns
-------
dict
Dictionary reporting required patterns not found and existing filenames not matching any
patterns.

Notes
-----
Can be used from bash, as:
DANDI_DEVEL=1 dandi validate-bids --schema="1.7.0+012+dandi001" --report="my.log" /my/path
"""
from .bids_validator_xs import validate_bids as validate_bids_

return validate_bids_(
validation_result = validate_bids_(
paths, schema_version=schema_version, debug=devel_debug, report_path=report
)
return dict(validation_result)


def validate(
Expand Down