Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
69f8086
Merge pull request #703 from hed-standard/stable
VisLab Jun 20, 2023
4f08dd2
Merge pull request #704 from hed-standard/master
VisLab Jun 20, 2023
4ae9b4a
Clean up some of the overly complex functions some, add a few unit tests
IanCa Jun 22, 2023
db04a14
Merge pull request #705 from IanCa/develop
VisLab Jun 22, 2023
d4d81b0
First pass word cloud for tag summaries
IanCa Jun 23, 2023
713bf40
Merge pull request #706 from IanCa/develop
VisLab Jun 24, 2023
a12efaa
Started working on allowing multiple iteration of remodeling
VisLab Jun 26, 2023
8f9647d
Added a Specifics level to the summary output
VisLab Jun 26, 2023
43834e7
Merge pull request #707 from VisLab/develop
VisLab Jun 26, 2023
fd7adea
Merge pull request #708 from hed-standard/develop
VisLab Jun 27, 2023
8daf383
Made the get_details_dict method return a uniform dict with Specifics
VisLab Jun 28, 2023
6edf1dd
Merge branch 'master' of https://github.com/hed-standard/hed-python i…
VisLab Jun 29, 2023
def4fc8
Minor refactoring to reduce complexity in remodeling validation sum op
VisLab Jun 29, 2023
2e5c304
Refactored merge_all in remodeling validation sum op to reduce comple…
VisLab Jun 29, 2023
a8912fe
Merge pull request #709 from VisLab/develop
VisLab Jun 30, 2023
c29ca21
Updated the HedTagCounts to improve complexity
VisLab Jun 30, 2023
d762867
Updated the tests for HedTagCounts
VisLab Jun 30, 2023
c934ac3
Merge pull request #711 from VisLab/develop
VisLab Jun 30, 2023
6cfee0c
Second pass word cloud
IanCa Jun 29, 2023
513d3ce
Fix reference in init
IanCa Jun 29, 2023
cbb04f5
Fix for inflect
IanCa Jun 30, 2023
c9db349
Update setup.cfg to limit pydantic version
IanCa Jul 1, 2023
d917089
Merge pull request #712 from IanCa/develop
VisLab Jul 2, 2023
01d7252
Updated factor_hed_type_op to not include condition variable separate…
VisLab Jul 2, 2023
c1c0dc6
Merge pull request #713 from VisLab/develop
VisLab Jul 2, 2023
4dce6e3
Revert inflect changes as it's fixed
IanCa Jul 3, 2023
9dffeb9
Lower requirement to 6.0.5 for python 37
IanCa Jul 3, 2023
9844ada
Throw hed file error in base input
IanCa Jul 3, 2023
fdaca38
Fix typo in wordcloud
IanCa Jul 3, 2023
57b6eca
Merge pull request #716 from IanCa/develop
VisLab Jul 3, 2023
e9398ac
Updated License file
VisLab Jul 3, 2023
de598d1
Updated exception handling in BaseInput and Sidecar for Invalid file …
VisLab Jul 4, 2023
e4eaa18
Merge pull request #717 from VisLab/develop
VisLab Jul 4, 2023
96a4d8a
Merge pull request #718 from hed-standard/develop
VisLab Jul 4, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- uses: actions/cache@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
Expand Down Expand Up @@ -85,7 +85,7 @@ jobs:
- uses: actions/cache@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Release 0.3.1 July 3, 2023
- Pinned the version of the pydantic and inflect libraries due to inflict.
- Reorganized JSON output of remodeling summaries so that all of consistent form.
- Fixed summarize_hed_tags_op so that tags were correctly categorized for output.
- Minor refactoring to reduce code complexity.
- BaseInput and Sidecar now raise HedFileError if input could not be read.


Release 0.3.0 June 20, 2023
- Introduction of partnered schema.
- Improved error handling for schema validation.
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MIT License
The MIT License (MIT)

Copyright (c) 2020+ HED Standard Working Group

Expand Down
4 changes: 2 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
defusedxml>=0.7.1
inflect>=6.0.2
myst-parser>=0.18.1
inflect>=6.0.5
numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.7.0
semantic_version>=2.10.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
wordcloud==1.9.2
3 changes: 2 additions & 1 deletion hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


class HedExceptions:
GENERIC_ERROR = 'GENERIC_ERROR'
# A list of all exceptions that can be generated by the hedtools.
FILE_NOT_FOUND = 'fileNotFound'
BAD_PARAMETERS = 'badParameters'
Expand All @@ -10,7 +11,7 @@ class HedExceptions:
INVALID_EXTENSION = 'invalidExtension'

INVALID_DATAFRAME = 'INVALID_DATAFRAME'

INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT'
# These are actual schema issues, not that the file cannot be found or parsed
SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
Expand Down
26 changes: 15 additions & 11 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
- An invalid dataframe was passed with size 0
- An invalid extension was provided
- A duplicate or empty column name appears

:raises OSError:
- Cannot open the indicated file

:raises KeyError:
- The specified worksheet name does not exist
- If the sidecar file or tabular file had invalid format and could not be read.

"""
if mapper is None:
mapper = ColumnMapper()
Expand Down Expand Up @@ -77,14 +75,20 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
elif not file:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
elif input_type in self.TEXT_EXTENSION:
self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
dtype=str, keep_default_na=True, na_values=None)
try:
self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
dtype=str, keep_default_na=True, na_values=None)
except Exception as e:
raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
# Convert nan values to a known value
self._dataframe = self._dataframe.fillna("n/a")
elif input_type in self.EXCEL_EXTENSION:
self._loaded_workbook = openpyxl.load_workbook(file)
loaded_worksheet = self.get_worksheet(self._worksheet_name)
self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
try:
self._loaded_workbook = openpyxl.load_workbook(file)
loaded_worksheet = self.get_worksheet(self._worksheet_name)
self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
except Exception as e:
raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)

Expand All @@ -94,7 +98,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
# todo: Can we get rid of this behavior now that we're using pandas?
column_issues = ColumnMapper.check_for_blank_names(self.columns, allow_blank_names=allow_blank_names)
if column_issues:
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found. See issues.",
self.name, issues=column_issues)

self.reset_mapper(mapper)
Expand Down Expand Up @@ -285,7 +289,7 @@ def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_ta

Notes:
Any attribute of a HedTag that returns a string is a valid value of tag_form.

:raises ValueError:
- There is not a loaded dataframe

Expand Down
12 changes: 8 additions & 4 deletions hed/models/hed_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,14 @@ def _get_tag_units_portion(self, tag_unit_classes):
@staticmethod
def _find_modifier_unit_entry(units, all_valid_unit_permutations):
possible_match = all_valid_unit_permutations.get(units)
if not possible_match or not possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = all_valid_unit_permutations.get(units.lower())
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None
# If we have a match that's a unit symbol, we're done, return it.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
return possible_match

possible_match = all_valid_unit_permutations.get(units.lower())
# Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None

return possible_match

Expand Down
15 changes: 6 additions & 9 deletions hed/models/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,13 @@ def load_sidecar_file(self, file):
if not file:
return {}
elif isinstance(file, str):
if not self.name:
self.name = file
try:
with open(file, "r") as fp:
if not self.name:
self.name = file
return self._load_json_file(fp)
except FileNotFoundError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file)
except TypeError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), file)
except OSError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) from e
else:
return self._load_json_file(file)

Expand Down Expand Up @@ -189,12 +187,11 @@ def _load_json_file(self, fp):

:raises HedFileError:
- If the file cannot be parsed.

"""
try:
return json.load(fp)
except json.decoder.JSONDecodeError as e:
raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name)
except (json.decoder.JSONDecodeError, AttributeError) as e:
raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e

def extract_definitions(self, hed_schema=None, error_handler=None):
""" Gather and validate definitions in metadata.
Expand Down
4 changes: 2 additions & 2 deletions hed/models/tabular_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ def __init__(self, file=None, sidecar=None, name=None):
""" Constructor for the TabularInput class.

Parameters:
file (str or file like): A tsv file to open.
sidecar (str or Sidecar): A Sidecar filename or Sidecar
file (str or FileLike): A tsv file to open.
sidecar (str or Sidecar or FileLike): A Sidecar or source file/filename.
name (str): The name to display for this file for error purposes.

:raises HedFileError:
Expand Down
68 changes: 45 additions & 23 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,6 @@ def _find_tag_entry(self, tag, schema_namespace=""):
clean_tag = str(tag)
namespace = schema_namespace
clean_tag = clean_tag[len(namespace):]
prefix_tag_adj = len(namespace)
working_tag = clean_tag.lower()

# Most tags are in the schema directly, so test that first
Expand All @@ -523,9 +522,26 @@ def _find_tag_entry(self, tag, schema_namespace=""):

return found_entry, remainder, []

prefix_tag_adj = len(namespace)

try:
found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
except self._TagIdentifyError as e:
issue = e.issue
return None, None, issue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and found_entry.takes_value_child_entry:
found_entry = found_entry.takes_value_child_entry

return found_entry, remainder, []

def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
"""Finds the base tag and remainder from the left, raising exception on issues"""
current_slash_index = -1
current_entry = None

# Loop left to right, checking each word. Once we find an invalid word, we stop.
while True:
next_index = working_tag.find("/", current_slash_index + 1)
Expand All @@ -541,36 +557,37 @@ def _find_tag_entry(self, tag, schema_namespace=""):
tag,
index_in_tag=prefix_tag_adj,
index_in_tag_end=prefix_tag_adj + next_index)
return None, None, error
raise self._TagIdentifyError(error)
# If this is not a takes value node, validate each term in the remainder.
if not current_entry.takes_value_child_entry:
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
return None, None, error
word_start_index += len(name) + 1
# This will raise _TagIdentifyError on any issues
self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
break

current_entry = parent_entry
current_slash_index = next_index
if next_index == len(working_tag):
break
continue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and current_entry.takes_value_child_entry:
current_entry = current_entry.takes_value_child_entry
found_entry = current_entry

return found_entry, remainder, []
return current_entry, current_slash_index

def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
""" Validates the terms past current_slash_index.

:raises _TagIdentifyError:
- One of the extension terms already exists as a schema term.
"""
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
raise self._TagIdentifyError(error)
word_start_index += len(name) + 1

# ===============================================
# Semi-private creation finalizing functions
Expand Down Expand Up @@ -801,3 +818,8 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
def _create_tag_entry(self, long_tag_name, key_class):
section = self._sections[key_class]
return section._create_tag_entry(long_tag_name)

class _TagIdentifyError(Exception):
"""Used internally to note when a tag cannot be identified."""
def __init__(self, issue):
self.issue = issue
81 changes: 81 additions & 0 deletions hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""The built-in functions to validate known attributes.

Template for the functions:
attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values):
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
bool
"""

from hed.errors.error_types import SchemaWarnings, ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_schema import HedSchema


def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
""" Check if comma separated list has valid HedTags.

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.

"""
issues = []
if not tag_entry.name.endswith("/#"):
issues += ErrorHandler.format_error(SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
attribute_name)

return issues


def tag_exists_check(hed_schema, tag_entry, attribute_name):
""" Check if the list of possible tags exists in the schema.

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.

"""
issues = []
possible_tags = tag_entry.attributes.get(attribute_name, "")
split_tags = possible_tags.split(",")
for org_tag in split_tags:
if org_tag and org_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
org_tag,
index_in_tag=0,
index_in_tag_end=len(org_tag))

return issues


def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name):
""" Check if the single tag is a partnered schema tag

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
rooted_tag = tag_entry.attributes.get(attribute_name, "")
if rooted_tag and rooted_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
rooted_tag,
index_in_tag=0,
index_in_tag_end=len(rooted_tag))

return issues
Loading