From 9e2de19b66e4ff73f2a1e0400af7527762de4aeb Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sun, 28 Sep 2025 14:47:16 -0500 Subject: [PATCH 1/3] Modified the defaults and the final output --- hed/errors/error_reporter.py | 17 +++++ hed/scripts/validate_bids.py | 112 +++++++++++++++++++--------- tests/errors/test_error_reporter.py | 40 ++++++++++ 3 files changed, 132 insertions(+), 37 deletions(-) diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index f4b7224b3..c3d9b4ff8 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -468,6 +468,23 @@ def aggregate_code_counts(file_code_dict) -> dict: total_counts[code] += count return dict(total_counts) + @staticmethod + def get_code_counts(issues: list[dict]) -> dict[str, int]: + """ Count the occurrences of each error code in the issues list. + + Parameters: + issues (list[dict]): A list of dictionaries containing the issues. + + Returns: + dict[str, int]: A dictionary with error codes as keys and their occurrence counts as values. + + """ + code_counts = defaultdict(int) + for issue in issues: + code = issue.get('code', 'UNKNOWN') + code_counts[code] += 1 + return dict(code_counts) + def sort_issues(issues, reverse=False) -> list[dict]: """Sort a list of issues by the error context values. diff --git a/hed/scripts/validate_bids.py b/hed/scripts/validate_bids.py index e5d77dcdc..ddfb202b1 100644 --- a/hed/scripts/validate_bids.py +++ b/hed/scripts/validate_bids.py @@ -20,6 +20,9 @@ import json import logging import sys +from hed import _version as vr +from hed.errors import get_printable_issue_string, ErrorHandler +from hed.tools import BidsDataset def get_parser(): # Create the argument parser @@ -31,22 +34,20 @@ def get_parser(): help="Apply error limit by file rather than overall for text output.") parser.add_argument("-f", "--format", choices=["text", "json", "json_pp"], default="text", help="Output format: 'text' (default) or 'json' ('json_pp' for pretty-printed json)") - parser.add_argument("-o", "--output_file", dest="output_file", default='', - help="Full path of output of validator -- otherwise output written to standard error.") - - parser.add_argument("-p", "--print_output", action='store_true', dest="print_output", - help="If present, output the results to standard out in addition to any saving of the files.") - parser.add_argument("-s", "--suffixes", dest="suffixes", nargs="*", default=['events', 'participants'], - help = "Optional list of suffixes (no under_bar) of tsv files to validate." + - " If -s with no values, will use all possible suffixes as with single argument '*'.") - parser.add_argument("-l", "--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - default="WARNING", help="Log level (case insensitive). Default: WARNING") + default="WARNING", help="Log level (case insensitive). Default: INFO") parser.add_argument("-lf", "--log-file", dest="log_file", default=None, help="Full path to save log output to file. If not specified, logs go to stderr.") parser.add_argument("-lq", "--log-quiet", action='store_true', dest="log_quiet", help="If present, suppress log output to stderr (only applies if --log-file is used).") + parser.add_argument("-o", "--output_file", dest="output_file", default='', + help="Full path of output of validator -- otherwise output written to standard error.") + parser.add_argument("-p", "--print_output", action='store_true', dest="print_output", + help="If present, output the results to standard out in addition to any saving of the files.") + parser.add_argument("-s", "--suffixes", dest="suffixes", nargs="*", default=['*'], + help = "Optional list of suffixes (no under_bar) of tsv files to validate." + + " If -s with no values, will use all possible suffixes as with single argument '*'.") parser.add_argument("-v", "--verbose", action='store_true', help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).") parser.add_argument("-w", "--check_for_warnings", action='store_true', dest="check_for_warnings", @@ -57,15 +58,62 @@ def get_parser(): return parser +def format_validation_results(issue_list, args, ErrorHandler): + """Generate and output validation results based on format and options. + + Parameters: + issue_list (list): List of validation issues found + args: Parsed command line arguments containing format and output options + ErrorHandler: Error handling class for filtering issues + + Returns: + str: Formatted validation results as a string in the requested format (text, json, or json_pp) + """ + # Output based on format + output = "" + if args.format == "json_pp": + output = json.dumps({"issues": issue_list, "hedtools_version": str(vr.get_versions())}, indent=4) + elif args.format == "json": + output = json.dumps(issue_list) + elif args.format == "text": + output = f"Using HEDTOOLS version: {str(vr.get_versions())}\n" + output += f"Number of issues: {len(issue_list)}\n" + if args.error_limit: + [issue_list, code_counts] = ErrorHandler.filter_issues_by_count(issue_list, args.error_limit, + by_file=args.errors_by_file) + output += " ".join(f"{code}:{count}" for code, count in code_counts.items()) + "\n" + output += f"Number of issues after filtering: {len(issue_list)}\n" + if issue_list: + output += get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False) + + return output + +def format_final_report(issue_list): + """Generate a final summary report of the validation results. + + Parameters: + issue_list (list): List of validation issues found + + Returns: + str: Summary report of the validation results + """ + report = f"Validation completed.\n\tFound {len(issue_list)} issues." + if issue_list: + unique_codes = ErrorHandler.get_code_counts(issue_list) + code_summary = ", ".join(f"{code}({count})" for code, count in unique_codes.items()) + report += f"\nCode counts: {code_summary}" + return report + + def main(arg_list=None): # Create the argument parser parser = get_parser() # Parse the arguments args = parser.parse_args(arg_list) - + print(f"{str(args)}") # Setup logging configuration - log_level = args.log_level.upper() if args.log_level else 'WARNING' + log_level = args.log_level.upper() if args.log_level else 'INFO' if args.verbose: log_level = 'INFO' @@ -106,19 +154,17 @@ def main(arg_list=None): try: issue_list = validate_dataset(args) except Exception as e: - logger.exception(f"Validation failed with exception: {e}") + logger.error(f"Validation failed with exception: {e}") raise + final_report = format_final_report(issue_list) + logger.info(final_report) + print(final_report) # Return 1 if there are issues, 0 otherwise return int(bool(issue_list)) def validate_dataset(args): - # Delayed imports to speed up --help - from hed.errors import get_printable_issue_string, ErrorHandler - from hed.tools import BidsDataset - from hed import _version as vr - logger = logging.getLogger('validate_bids') logger.info(f"Data directory: {args.data_path}") logger.info(f"HED tools version: {str(vr.get_versions())}") @@ -128,8 +174,7 @@ def validate_dataset(args): if args.suffixes == ['*'] or args.suffixes == []: args.suffixes = None - logger.debug("Using all available suffixes") - + # Validate the dataset try: logger.info("Creating BIDS dataset object...") @@ -145,31 +190,24 @@ def validate_dataset(args): logger.debug("Full exception details:", exc_info=True) raise - # Output based on format - output = "" - if args.format == "json_pp": - output = json.dumps({"issues": issue_list, "hedtools_version": str(vr.get_versions())}, indent=4) - elif args.format == "json": - output = json.dumps(issue_list) - elif args.format == "text": - output = f"Using HEDTOOLS version: {str(vr.get_versions())}\n" - output += f"Number of issues: {len(issue_list)}\n" - if args.error_limit: - [issue_list, code_counts] = ErrorHandler.filter_issues_by_count(issue_list, args.error_limit, - by_file=args.errors_by_file) - output += " ".join(f"{code}:{count}" for code, count in code_counts.items()) + "\n" - output += f"Number of issues after filtering: {len(issue_list)}\n" - if issue_list: - output += get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False) - + # Generate and output the results if there is to be output + if args.output_file or args.print_output: + output = format_validation_results(issue_list, args, ErrorHandler) + else: + output = "" + # Output to file or print to screen if args.output_file: with open(args.output_file, 'w') as fp: fp.write(output) if args.print_output: print(output) + + return issue_list + + if __name__ == "__main__": sys.exit(main()) diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index db6ddafb0..10c77498e 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -228,3 +228,43 @@ def test_format_unknown_error(self): "param1", param2=0, actual_error=actual_code) self.assertEqual(error_list[0]['code'], actual_code) + + def test_get_code_counts(self): + """Test the get_code_counts static method.""" + # Create test issues with different error codes + issues = [ + {'code': 'ERROR_CODE_1', 'message': 'First error'}, + {'code': 'ERROR_CODE_2', 'message': 'Second error'}, + {'code': 'ERROR_CODE_1', 'message': 'Another first error'}, + {'code': 'ERROR_CODE_3', 'message': 'Third error'}, + {'code': 'ERROR_CODE_1', 'message': 'Yet another first error'}, + {'code': 'ERROR_CODE_2', 'message': 'Another second error'} + ] + + result = ErrorHandler.get_code_counts(issues) + + # Check that counts are correct + expected = { + 'ERROR_CODE_1': 3, + 'ERROR_CODE_2': 2, + 'ERROR_CODE_3': 1 + } + self.assertEqual(result, expected) + + # Test with empty list + empty_result = ErrorHandler.get_code_counts([]) + self.assertEqual(empty_result, {}) + + # Test with issue missing 'code' key + issues_with_missing_code = [ + {'code': 'VALID_CODE', 'message': 'Valid error'}, + {'message': 'Missing code error'}, # No 'code' key + {'code': 'VALID_CODE', 'message': 'Another valid error'} + ] + + result_with_missing = ErrorHandler.get_code_counts(issues_with_missing_code) + expected_with_missing = { + 'VALID_CODE': 2, + 'UNKNOWN': 1 # Default for missing code + } + self.assertEqual(result_with_missing, expected_with_missing) From 95a16afc9894d46fc800216e6f571f3ac04d3201 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sun, 28 Sep 2025 15:04:51 -0500 Subject: [PATCH 2/3] Updated the validation tests for logging --- tests/scripts/test_validate_bids.py | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/scripts/test_validate_bids.py b/tests/scripts/test_validate_bids.py index d4e65e771..ac43bbcd7 100644 --- a/tests/scripts/test_validate_bids.py +++ b/tests/scripts/test_validate_bids.py @@ -1,6 +1,7 @@ import os import io import unittest +import logging from unittest.mock import patch from hed.scripts.validate_bids import get_parser, validate_dataset, main @@ -14,16 +15,31 @@ def setUpClass(cls): def test_main_bids(self): arg_list = [self.data_root, '-x', 'derivatives', 'stimuli' ] - with patch('sys.stdout', new=io.StringIO()) as fp: - main(arg_list) - x = fp.getvalue() - self.assertFalse(x) + # Suppress all output including logging + with patch('sys.stdout', new=io.StringIO()), \ + patch('sys.stderr', new=io.StringIO()), \ + patch('logging.getLogger') as mock_logger: + # Configure mock logger to do nothing + mock_logger.return_value.info.return_value = None + mock_logger.return_value.debug.return_value = None + mock_logger.return_value.warning.return_value = None + mock_logger.return_value.error.return_value = None + x = main(arg_list) + self.assertFalse(x) def test_main_warnings(self): arg_list = [self.data_root, '-x', 'derivatives', 'stimuli', '-w', '-p', '-s' ] - with patch('sys.stdout', new=io.StringIO()) as fp: - main(arg_list) - self.assertTrue(fp.getvalue()) + # Suppress all output including logging + with patch('sys.stdout', new=io.StringIO()), \ + patch('sys.stderr', new=io.StringIO()), \ + patch('logging.getLogger') as mock_logger: + # Configure mock logger to do nothing + mock_logger.return_value.info.return_value = None + mock_logger.return_value.debug.return_value = None + mock_logger.return_value.warning.return_value = None + mock_logger.return_value.error.return_value = None + x = main(arg_list) + self.assertTrue(x) if __name__ == '__main__': unittest.main() From 9423ee621f7b2cad296a2ac9041dd654bd190446 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 29 Sep 2025 06:29:51 -0500 Subject: [PATCH 3/3] Modified suffix defaults --- hed/scripts/validate_bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hed/scripts/validate_bids.py b/hed/scripts/validate_bids.py index ddfb202b1..052aef4b4 100644 --- a/hed/scripts/validate_bids.py +++ b/hed/scripts/validate_bids.py @@ -45,7 +45,7 @@ def get_parser(): help="Full path of output of validator -- otherwise output written to standard error.") parser.add_argument("-p", "--print_output", action='store_true', dest="print_output", help="If present, output the results to standard out in addition to any saving of the files.") - parser.add_argument("-s", "--suffixes", dest="suffixes", nargs="*", default=['*'], + parser.add_argument("-s", "--suffixes", dest="suffixes", nargs="*", default=['events', 'participants'], help = "Optional list of suffixes (no under_bar) of tsv files to validate." + " If -s with no values, will use all possible suffixes as with single argument '*'.") parser.add_argument("-v", "--verbose", action='store_true',