Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9dd6cea
add remodeler validator
monique2208 Oct 10, 2023
b634d07
Merge remote-tracking branch 'upstream/develop' into dev-schema
monique2208 Oct 10, 2023
402b44b
fix incorrect function call
monique2208 Oct 14, 2023
a0f19d8
replace operation parameter definitions
monique2208 Oct 30, 2023
f4644a5
add operation name
monique2208 Oct 30, 2023
1fe63e3
remove original validation functions and tests
monique2208 Nov 14, 2023
fd09c2c
change base op to abstract and write tests
monique2208 Nov 21, 2023
1e088d2
remove original schema
monique2208 Nov 21, 2023
b4767c4
reorder base_op methods
monique2208 Nov 27, 2023
3242162
add to operation parameter specification
monique2208 Dec 5, 2023
9fd6d53
finish validator tests
monique2208 Dec 5, 2023
ffd74fc
update docstrings
monique2208 Dec 5, 2023
df018a3
Merge branch 'develop' of https://github.com/hed-standard/hed-python …
monique2208 Dec 5, 2023
e1055ea
update doc + specification
monique2208 Dec 5, 2023
ff79911
update docs and json schema specification of operations
monique2208 Dec 12, 2023
63a168b
make compiled schema accessible in validator
monique2208 Dec 12, 2023
07814d7
add dependency error message
monique2208 Dec 18, 2023
266e984
add dependency checks
monique2208 Dec 18, 2023
ad75733
correct some parameter specifications
monique2208 Dec 18, 2023
3df10ac
remodel schema additional validation of input data
monique2208 Jan 5, 2024
d4a19e5
correct base class instantiation and summaries
monique2208 Jan 5, 2024
cb53d44
update tests
monique2208 Jan 5, 2024
e0f90ad
Merge branch 'develop' of https://github.com/hed-standard/hed-python …
monique2208 Jan 5, 2024
e9aa9f7
Merge branch 'develop' of https://github.com/hed-standard/hed-python …
monique2208 Jan 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions hed/tools/remodeling/cli/run_remodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from hed.errors.exceptions import HedFileError
from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict
from hed.tools.bids.bids_dataset import BidsDataset
from hed.tools.remodeling.validator import RemodelerValidator
from hed.tools.remodeling.dispatcher import Dispatcher
from hed.tools.remodeling.backup_manager import BackupManager

Expand Down Expand Up @@ -109,10 +110,11 @@ def parse_arguments(arg_list=None):
print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}")
with open(args.model_path, 'r') as fp:
operations = json.load(fp)
parsed_operations, errors = Dispatcher.parse_operations(operations)
validator = RemodelerValidator()
errors = validator.validate(operations)
if errors:
raise ValueError("UnableToFullyParseOperations",
f"Fatal operation error, cannot continue:\n{Dispatcher.errors_to_str(errors)}")
f"Fatal operation error, cannot continue:\n{errors}")
return args, operations


Expand Down
34 changes: 5 additions & 29 deletions hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, operation_list, data_root=None,
""" Constructor for the dispatcher.

Parameters:
operation_list (list): List of unparsed operations.
operation_list (list): List of valid unparsed operations.
data_root (str or None): Root directory for the dataset. If none, then backups are not made.
hed_versions (str, list, HedSchema, or HedSchemaGroup): The HED schema.

Expand All @@ -42,11 +42,7 @@ def __init__(self, operation_list, data_root=None,
raise HedFileError("BackupDoesNotExist",
f"Remodeler cannot be run with a dataset without first creating the "
f"{self.backup_name} backup for {self.data_root}", "")
op_list, errors = self.parse_operations(operation_list)
if errors:
these_errors = self.errors_to_str(errors, 'Dispatcher failed due to invalid operations')
raise ValueError("InvalidOperationList", f"{these_errors}")
self.parsed_ops = op_list
self.parsed_ops = self.parse_operations(operation_list)
self.hed_schema = self.get_schema(hed_versions)
self.summary_dicts = {}

Expand Down Expand Up @@ -183,31 +179,11 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s

@staticmethod
def parse_operations(operation_list):
errors = []
operations = []
for index, item in enumerate(operation_list):
try:
if not isinstance(item, dict):
raise TypeError("InvalidOperationFormat",
f"Each operations must be a dictionary but operation {str(item)} is {type(item)}")
if "operation" not in item:
raise KeyError("MissingOperation",
f"operation {str(item)} does not have a operation key")
if "parameters" not in item:
raise KeyError("MissingParameters",
f"Operation {str(item)} does not have a parameters key")
if item["operation"] not in valid_operations:
raise KeyError("OperationNotListedAsValid",
f"Operation {item['operation']} must be added to operations_list "
f"before it can be executed.")
new_operation = valid_operations[item["operation"]](item["parameters"])
operations.append(new_operation)
except Exception as ex:
errors.append({"index": index, "item": f"{item}", "error_type": type(ex),
"error_code": ex.args[0], "error_msg": ex.args[1]})
if errors:
return [], errors
return operations, []
new_operation = valid_operations[item["operation"]](item["parameters"])
operations.append(new_operation)
return operations

@staticmethod
def prep_data(df):
Expand Down
102 changes: 26 additions & 76 deletions hed/tools/remodeling/operations/base_op.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,29 @@
""" Base class for remodeling operations. """

from abc import ABC, abstractmethod

class BaseOp:
""" Base class for operations. All remodeling operations should extend this class.

The base class holds the parameters and does basic parameter checking against the operation's specification.

"""

def __init__(self, op_spec, parameters):
""" Base class constructor for operations.
class BaseOp(ABC):
""" Base class for operations. All remodeling operations should extend this class."""

def __init__(self, parameters):
""" Constructor for the BaseOp class. Should be extended by operations.

Parameters:
op_spec (dict): Specification for required and optional parameters.
parameters (dict): Actual values of the parameters for the operation.

:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.

:raises TypeError:
- If a parameter has the wrong type.

:raises ValueError:
- If the specification is missing a valid operation.

parameters (dict): A dictionary specifying the appropriate parameters for the operation.
"""
self.operation = op_spec.get("operation", "")
if not self.operation:
raise ValueError("OpMustHaveOperation", "Op must have operation is empty")
self.required_params = op_spec.get("required_parameters", {})
self.optional_params = op_spec.get("optional_parameters", {})
self.check_parameters(parameters)

def check_parameters(self, parameters):
""" Verify that the parameters meet the operation specification.

Parameters:
parameters (dict): Dictionary of parameters for this operation.
self.parameters = parameters

:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
@property
@abstractmethod
def NAME(self):
pass

:raises TypeError:
- If a parameter has the wrong type.

"""

required = set(self.required_params.keys())
required_missing = required.difference(set(parameters.keys()))
if required_missing:
raise KeyError("MissingRequiredParameters",
f"{self.operation} requires parameters {list(required_missing)}")
for param_name, param_value in parameters.items():
if param_name in self.required_params:
param_type = self.required_params[param_name]
elif param_name in self.optional_params:
param_type = self.optional_params[param_name]
else:
raise KeyError("BadParameter",
f"{param_name} not a required or optional parameter for {self.operation}")
if isinstance(param_type, list):
self._check_list_type(param_value, param_type)
elif not isinstance(param_value, param_type):
raise TypeError("BadType", f"{param_value} has type {type(param_value)} not {param_type}")
@property
@abstractmethod
def PARAMS(self):
pass

@abstractmethod
def do_op(self, dispatcher, df, name, sidecar=None):
""" Base class method to be overridden by each operation.

Expand All @@ -78,21 +36,13 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""

return df.copy()

@staticmethod
def _check_list_type(param_value, param_type):
""" Check a parameter value against its specified type.

Parameters:
param_value (any): The value to be checked.
param_type (any): Class to check the param_value against.

:raises TypeError:
- If param_value is not an instance of param_type.

"""

for this_type in param_type:
if isinstance(param_value, this_type):
return
raise TypeError("BadType", f"{param_value} has type {type(param_value)} which is not in {str(param_type)}")
@abstractmethod
def validate_input_data(parameters):
'''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema.
For example, whether two input arrays are the same length. Minimum implementation should return an empty list
to indicate no errors were found. If additional validation is necessary, method should perform the validation and
return a list with user friendly error strings.
'''
return []
68 changes: 42 additions & 26 deletions hed/tools/remodeling/operations/convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,53 @@
""" Convert the type of the specified columns of a tabular file. """
#TODO finish implementation

from hed.tools.remodeling.operations.base_op import BaseOp


class ConvertColumnsOp(BaseOp):
""" Convert.
""" Convert data type in column

Required remodeling parameters:
- **column_names** (*list*): The list of columns to convert.
- **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)
- **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)

Optional remodeling parameters:
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).



"""

NAME = "convert_columns"

PARAMS = {
"operation": "convert_columns",
"required_parameters": {
"column_names": list,
"convert_to": str
"type": "object",
"properties": {
"column_names": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": True
},
"convert_to": {
"type": "string",
"enum": ['str', 'int', 'float', 'fixed'],
},
"decimal_places": {
"type": "integer"
}
},
"required": [
"column_names",
"convert_to"
],
"additionalProperties": False,
"if": {
"properties": {
"convert_to": {"const": "fixed"}
}
},
"optional_parameters": {
"decimal_places": int
"then": {
"required": ["decimal_places"]
}
}

Expand All @@ -31,25 +57,11 @@ def __init__(self, parameters):
Parameters:
parameters (dict): Parameter values for required and optional parameters.

:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.

:raises TypeError:
- If a parameter has the wrong type.

:raises ValueError:
- If convert_to is not one of the allowed values.

"""
super().__init__(self.PARAMS, parameters)
super().__init__(parameters)
self.column_names = parameters['column_names']
self.convert_to = parameters['convert_to']
self.decimal_places = parameters.get('decimal_places', None)
self.allowed_types = ['str', 'int', 'float', 'fixed']
if self.convert_to not in self.allowed_types:
raise ValueError("CannotConvertToSpecifiedType",
f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}")

def do_op(self, dispatcher, df, name, sidecar=None):
""" Convert the specified column to a specified type.
Expand All @@ -67,3 +79,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):

df_new = df.copy()
return df_new

@staticmethod
def validate_input_data(operations):
return []
Loading