diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py new file mode 100644 index 000000000..3e4cdbe09 --- /dev/null +++ b/dandi/bids_utils.py @@ -0,0 +1,77 @@ +from .utils import pluralize + + +def is_valid( + validation_result: dict, + allow_invalid_filenames: bool = False, + allow_missing_files: bool = False, +) -> bool: + """Determine whether a dataset validation result marks it as valid. + + Parameters + ---------- + validation_result: dict + Dictionary as returned by `dandi.bids_validator_xs.validate_bids()`. + allow_missing_files: bool, optional + Whether to consider the dataset invalid if any mandatory files are not present. + allow_invalid_filenames: bool, optional + Whether to consider the dataset invalid if any filenames inside are invalid. + + Returns + ------- + bool: whether the dataset validation result marks it as valid. + + """ + + if allow_invalid_filenames and allow_missing_files: + return True + missing_files = [ + i["regex"] for i in validation_result["schema_tracking"] if i["mandatory"] + ] + invalid_filenames = validation_result["path_tracking"] + + if missing_files and not allow_missing_files: + return False + if invalid_filenames and not allow_invalid_filenames: + return False + else: + return True + + +def report_errors( + validation_result: dict, +) -> None: + import click + + missing_files = [ + pattern["regex"] + for pattern in validation_result["schema_tracking"] + if pattern["mandatory"] + ] + error_list = [] + if missing_files: + error_substring = ( + f"{pluralize(len(missing_files), 'filename pattern')} required " + "by BIDS could not be found" + ) + error_list.append(error_substring) + if validation_result["path_tracking"]: + error_substring = ( + f"{pluralize(len(validation_result['path_tracking']), 'filename')} " + "did not match any pattern known to BIDS" + ) + error_list.append(error_substring) + if error_list: + error_string = " and ".join(error_list) + error_string = f"Summary: {error_string}." + click.secho( + error_string, + bold=True, + fg="red", + ) + else: + click.secho( + "All filenames are BIDS-valid and no mandatory files are missing.", + bold=True, + fg="green", + ) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index 13da4ad8a..0ed382ce0 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -611,17 +611,28 @@ def select_schema_dir( ) else: with open(dataset_description) as f: - dataset_info = json.load(f) try: - schema_version = dataset_info["BIDSVersion"] - except KeyError: - lgr.warning( - "BIDSVersion is not specified in " - "`dataset_description.json`. " - "Falling back to %s.", + dataset_info = json.load(f) + except json.decoder.JSONDecodeError: + lgr.error( + "The `%s` file could not be loaded. " + "Please check whether the file is valid JSON. " + "Falling back to the %s BIDS version.", + dataset_description, schema_min_version, ) schema_version = schema_min_version + else: + try: + schema_version = dataset_info["BIDSVersion"] + except KeyError: + lgr.warning( + "BIDSVersion is not specified in " + "`dataset_description.json`. " + "Falling back to %s.", + schema_min_version, + ) + schema_version = schema_min_version if not schema_version: lgr.warning( "No BIDSVersion could be found for the dataset. Falling back to %s.", diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 5fcbede87..553e40a69 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -4,7 +4,6 @@ import click from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions -from ..utils import pluralize @click.command() @@ -26,43 +25,22 @@ def validate_bids( paths, schema=None, devel_debug=False, report=False, report_flag=False ): """Validate BIDS paths.""" + from ..bids_utils import is_valid, report_errors from ..validate import validate_bids as validate_bids_ if report_flag and not report: report = report_flag - validation_result = validate_bids_( + validator_result = validate_bids_( *paths, report=report, schema_version=schema, devel_debug=devel_debug, ) - missing_files = [ - pattern["regex"] - for pattern in validation_result["schema_tracking"] - if pattern["mandatory"] - ] - error_list = [] - if missing_files: - error_substring = ( - f"{pluralize(len(missing_files), 'filename pattern')} required " - "by BIDS could not be found" - ) - error_list.append(error_substring) - if validation_result["path_tracking"]: - error_substring = ( - f"{pluralize(len(validation_result['path_tracking']), 'filename')} " - "did not match any pattern known to BIDS" - ) - error_list.append(error_substring) - if error_list: - error_string = " and ".join(error_list) - error_string = f"Summary: {error_string}." - click.secho( - error_string, - bold=True, - fg="red", - ) + valid = is_valid(validator_result) + report_errors(validator_result) + + if not valid: raise SystemExit(1) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 385f52de6..7a8b21998 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -40,6 +40,32 @@ lgr = get_logger() +def copytree(src, dst, symlinks=False, ignore=None): + """Function mimicking `shutil.copytree()` behaviour but supporting existing target + directories. + + Notes + ----- + * This function can be removed and replaced by a call to `shutil.copytree()` + setting the `dirs_exist_ok` keyword argument to true, whenever Python 3.7 + is no longer supported. + + References + ---------- + https://docs.python.org/3/whatsnew/3.8.html#shutil + """ + if not os.path.exists(dst): + os.makedirs(dst) + for item in os.listdir(src): + s = os.path.join(src, item) + d = os.path.join(dst, item) + if os.path.isdir(s): + copytree(s, d, symlinks, ignore) + else: + if not os.path.exists(d) or os.stat(s).st_mtime - os.stat(d).st_mtime > 1: + shutil.copy2(s, d) + + @pytest.fixture(autouse=True) def capture_all_logs(caplog: pytest.LogCaptureFixture) -> None: caplog.set_level(logging.DEBUG, logger="dandi") @@ -411,6 +437,16 @@ def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset: return new_dandiset +@pytest.fixture() +def bids_dandiset(new_dandiset: SampleDandiset, bids_examples: str) -> SampleDandiset: + copytree( + os.path.join(bids_examples, "asl003"), + str(new_dandiset.dspath) + "/", + ) + (new_dandiset.dspath / "CHANGES").write_text("0.1.0 2014-11-03\n") + return new_dandiset + + @pytest.fixture() def video_files(tmp_path): video_paths = [] diff --git a/dandi/tests/test_upload.py b/dandi/tests/test_upload.py index e042024d2..d925db0a3 100644 --- a/dandi/tests/test_upload.py +++ b/dandi/tests/test_upload.py @@ -181,6 +181,38 @@ def test_upload_sync_folder( text_dandiset.dandiset.get_asset_by_path("subdir2/banana.txt") +def test_upload_bids(mocker: MockerFixture, bids_dandiset: SampleDandiset) -> None: + iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") + bids_dandiset.upload(existing="forced") + # Check whether upload was run + iter_upload_spy.assert_called() + # Check existence of assets: + dandiset = bids_dandiset.dandiset + # file we created? + dandiset.get_asset_by_path("CHANGES") + # BIDS descriptor file? + dandiset.get_asset_by_path("dataset_description.json") + # actual data file? + dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz") + + +def test_upload_bids_validation_ignore( + mocker: MockerFixture, bids_dandiset: SampleDandiset +) -> None: + iter_upload_spy = mocker.spy(LocalFileAsset, "iter_upload") + bids_dandiset.upload(existing="forced", validation="ignore") + # Check whether upload was run + iter_upload_spy.assert_called() + # Check existence of assets: + dandiset = bids_dandiset.dandiset + # file we created? + dandiset.get_asset_by_path("CHANGES") + # BIDS descriptor file? + dandiset.get_asset_by_path("dataset_description.json") + # actual data file? + dandiset.get_asset_by_path("sub-Sub1/anat/sub-Sub1_T1w.nii.gz") + + def test_upload_sync_zarr(mocker, zarr_dandiset): rmtree(zarr_dandiset.dspath / "sample.zarr") zarr.save(zarr_dandiset.dspath / "identity.zarr", np.eye(5)) diff --git a/dandi/upload.py b/dandi/upload.py index 56ccef1ae..f79369067 100644 --- a/dandi/upload.py +++ b/dandi/upload.py @@ -10,6 +10,7 @@ import click from . import lgr +from .bids_utils import is_valid from .consts import DRAFT, dandiset_identifier_regex, dandiset_metadata_file from .dandiapi import RemoteAsset from .exceptions import NotFoundError @@ -61,6 +62,20 @@ def upload( " paths. Use 'dandi download' or 'organize' first." ) + # Pre-validate BIDS datasets before going for individual files. + bids_datasets = _bids_discover_and_validate(dandiset_.path, validation) + + if bids_datasets: + _bids_datasets = [str(i) for i in bids_datasets] + if not allow_any_path: + lgr.info( + "Enabling --allow-any-path since we detected %s under the following " + "paths: %s", + pluralize(len(_bids_datasets), "BIDS dataset"), + ", ".join(_bids_datasets), + ) + allow_any_path = True + instance = get_instance(dandi_instance) assert instance.api is not None api_url = instance.api @@ -383,3 +398,61 @@ def check_replace_asset( def skip_file(msg: Any) -> Dict[str, str]: return {"status": "skipped", "message": str(msg)} + + +def _bids_discover_and_validate( + dandiset_path: str, + validation: Optional[str] = "require", +) -> List[Path]: + """Temporary implementation for discovery and validation of BIDS datasets + + References: + - unification of validation records: https://github.com/dandi/dandi-cli/issues/943 + - validation "design doc": https://github.com/dandi/dandi-cli/pull/663 + """ + from .utils import find_files + from .validate import validate_bids + + bids_descriptions = map( + Path, find_files(r"(^|[/\x5C])dataset_description\.json$", dandiset_path) + ) + bids_datasets = [bd.parent for bd in bids_descriptions] + if bids_datasets: + lgr.debug( + "Detected %s under following paths: %s", + pluralize(len(bids_datasets), "BIDS dataset"), + ", ".join(str(i) for i in bids_datasets), + ) + + if validation != "skip": + if bids_datasets: + bids_datasets_to_validate = list() + for p in bids_datasets: + for bd in bids_datasets: + try: + p.relative_to(bd) + except ValueError: + try: + bd.relative_to(p) + except ValueError: + pass + else: + bids_datasets_to_validate.append(bd) + else: + bids_datasets_to_validate.append(bd) + else: + bids_datasets_to_validate = bids_datasets + bids_datasets_to_validate.sort() + validated_datasets = [] + for bd in bids_datasets_to_validate: + validator_result = validate_bids(bd) + valid = is_valid( + validator_result, + allow_missing_files=validation == "ignore", + allow_invalid_filenames=validation == "ignore", + ) + if valid: + validated_datasets.append(bd) + return validated_datasets + else: + return bids_datasets diff --git a/dandi/validate.py b/dandi/validate.py index 6270af743..b0d857537 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,4 +1,5 @@ -from typing import Any, Iterator, List, Optional, Tuple +from pathlib import Path +from typing import Iterator, List, Optional, Tuple, Union from .files import find_dandi_files @@ -6,11 +7,11 @@ def validate_bids( - *paths: str, + *paths: Union[str, Path], schema_version: Optional[str] = None, devel_debug: bool = False, report: Optional[str] = None, -) -> Any: +) -> dict: """Validate BIDS paths. Parameters @@ -26,6 +27,12 @@ def validate_bids( If string, the string will be used as the output path. If the variable evaluates as False, no log will be written. + Returns + ------- + dict + Dictionary reporting required patterns not found and existing filenames not matching any + patterns. + Notes ----- Can be used from bash, as: @@ -33,9 +40,10 @@ def validate_bids( """ from .bids_validator_xs import validate_bids as validate_bids_ - return validate_bids_( + validation_result = validate_bids_( paths, schema_version=schema_version, debug=devel_debug, report_path=report ) + return dict(validation_result) def validate(