Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
expand_defs: bool
Expand any def tags found
Returns:
A list of HedStrings, or a list of lists of HedStrings
tuple: A list of HedStrings, or a list of lists of HedStrings, DefinitionDict

"""
if isinstance(sidecar, str):
sidecar = Sidecar(sidecar)
Expand Down
7 changes: 4 additions & 3 deletions hed/models/sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,10 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No

Parameters:
hed_schema (HedSchema): Input data to be validated.
extra_def_dicts(list or DefinitionDict): extra def dicts in addition to sidecar
name(str): The name to report this sidecar as
error_handler (ErrorHandler): Error context to use. Creates a new one if None
extra_def_dicts(list or DefinitionDict): Extra def dicts in addition to sidecar.
name(str): The name to report this sidecar as.
error_handler (ErrorHandler): Error context to use. Creates a new one if None.

Returns:
issues (list of dict): A list of issues associated with each level in the HED string.
"""
Expand Down
3 changes: 2 additions & 1 deletion hed/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
from .analysis import analysis_util
from .analysis.analysis_util import assemble_hed, search_tabular, get_assembled_strings
from .analysis.analysis_util import assemble_hed
# from .analysis.analysis_util import search_tabular, get_assembled_strings

from .remodeling.cli import run_remodel
from .remodeling.cli import run_remodel_backup
Expand Down
103 changes: 51 additions & 52 deletions hed/tools/analysis/analysis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import pandas as pd
from hed.models.tabular_input import TabularInput
from hed.models.expression_parser import QueryParser
from hed.tools.util.data_util import separate_values
from hed.models.hed_tag import HedTag
from hed.models.hed_group import HedGroup
Expand Down Expand Up @@ -45,57 +44,57 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
return df, definitions


def get_assembled_strings(table, hed_schema=None, expand_defs=False):
""" Return HED string objects for a tabular file.

Parameters:
table (TabularInput): The input file to be searched.
hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form.
expand_defs (bool): If True, definitions are expanded when the events are assembled.

Returns:
list: A list of HedString or HedStringGroup objects.

"""
hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
expand_defs=expand_defs, remove_definitions=True))
return hed_list


def search_tabular(data_input, hed_schema, query, columns_included=None):
""" Return a dataframe with results of query.

Parameters:
data_input (TabularInput): The tabular input file (e.g., events) to be searched.
hed_schema (HedSchema or HedSchemaGroup): The schema(s) under which to make the query.
query (str or list): The str query or list of string queries to make.
columns_included (list or None): List of names of columns to include

Returns:
DataFrame or None: A DataFrame with the results of the query or None if no events satisfied the query.

"""

eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
expression = QueryParser(query)
hed_tags = []
row_numbers = []
for index, next_item in enumerate(hed_list):
match = expression.search(next_item)
if not match:
continue
hed_tags.append(next_item)
row_numbers.append(index)

if not row_numbers:
df = None
elif not eligible_columns:
df = pd.DataFrame({'row_number': row_numbers, 'HED_assembled': hed_tags})
else:
df = data_input.dataframe.iloc[row_numbers][eligible_columns].reset_index()
df.rename(columns={'index': 'row_number'})
return df
# def get_assembled_strings(table, hed_schema=None, expand_defs=False):
# """ Return HED string objects for a tabular file.
#
# Parameters:
# table (TabularInput): The input file to be searched.
# hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form.
# expand_defs (bool): If True, definitions are expanded when the events are assembled.
#
# Returns:
# list: A list of HedString or HedStringGroup objects.
#
# """
# hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
# expand_defs=expand_defs, remove_definitions=True))
# return hed_list
#

# def search_tabular(data_input, hed_schema, query, columns_included=None):
# """ Return a dataframe with results of query.
#
# Parameters:
# data_input (TabularInput): The tabular input file (e.g., events) to be searched.
# hed_schema (HedSchema or HedSchemaGroup): The schema(s) under which to make the query.
# query (str or list): The str query or list of string queries to make.
# columns_included (list or None): List of names of columns to include
#
# Returns:
# DataFrame or None: A DataFrame with the results of the query or None if no events satisfied the query.
#
# """
#
# eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
# hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
# expression = QueryParser(query)
# hed_tags = []
# row_numbers = []
# for index, next_item in enumerate(hed_list):
# match = expression.search(next_item)
# if not match:
# continue
# hed_tags.append(next_item)
# row_numbers.append(index)
#
# if not row_numbers:
# df = None
# elif not eligible_columns:
# df = pd.DataFrame({'row_number': row_numbers, 'HED_assembled': hed_tags})
# else:
# df = data_input.dataframe.iloc[row_numbers][eligible_columns].reset_index()
# df.rename(columns={'index': 'row_number'})
# return df


# def remove_defs(hed_strings):
Expand Down
15 changes: 8 additions & 7 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@
from hed.schema import HedSchema, HedSchemaGroup
from hed.tools.analysis.temporal_event import TemporalEvent
from hed.models.model_constants import DefTagNames
from hed.models.df_util import get_assembled


class EventManager:

def __init__(self, data, hed_schema):
def __init__(self, data, schema):
""" Create an event manager for an events file.

Parameters:
data (TabularInput): A tabular input file.
hed_schema (HedSchema): A HED schema
schema (HedSchema): A HED schema

Raises:
HedFileError: if there are any unmatched offsets.

"""

if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup):
if not isinstance(schema, HedSchema) and not isinstance(schema, HedSchemaGroup):
raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup")
self.hed_schema = hed_schema
self.schema = schema
self.data = data
self.event_list = [[] for _ in range(len(self.data.dataframe))]
self.hed_strings = [None for _ in range(len(self.data.dataframe))]
Expand Down Expand Up @@ -56,10 +57,10 @@ def _create_event_list(self):

onset_dict = {}
event_index = 0
for hed in self.data.iter_dataframe(hed_ops=[self.hed_schema], return_string_only=True,
expand_defs=False, remove_definitions=True):
self.hed_strings, definitions = get_assembled(self.data, self.data._sidecar, self.schema, extra_def_dicts=None,
join_columns=True, shrink_defs=True, expand_defs=False)
for hed in self.hed_strings:
# to_remove = [] # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1)
self.hed_strings[event_index] = hed
group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY},
include_groups=2)
for tup in group_tuples:
Expand Down
10 changes: 9 additions & 1 deletion hed/tools/analysis/hed_context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from hed.schema import HedSchema, HedSchemaGroup
from hed.tools.analysis.analysis_util import hed_to_str

#TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager.

class OnsetGroup:
def __init__(self, name, contents, start_index, end_index=None):
Expand All @@ -23,7 +24,8 @@ def __init__(self, hed_strings, hed_schema):
""" Create an context manager for an events file.

Parameters:
hed_strings (list): A list of hed_strings to be managed.
hed_strings (list): A list of HedString objects to be managed.
hed_schema (HedSchema): A HedSchema

Raises:
HedFileError: if there are any unmatched offsets.
Expand All @@ -46,6 +48,12 @@ def __init__(self, hed_strings, hed_schema):
self._create_onset_list()
self._set_event_contexts()

# def _extract_hed_objs(self, assembled):
# hed_objs = [None for _ in range(len(assembled))]
# for index, value in assembled["HED_assembled"].items():
# hed_objs[index] = HedString(value, hed_schema=self.hed_schema)
# return hed_objs

def iter_context(self):
""" Iterate rows of context.

Expand Down
18 changes: 9 additions & 9 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from hed.models.tabular_input import TabularInput
from hed.models.sidecar import Sidecar
from hed.models.expression_parser import QueryParser
from hed.tools.analysis.analysis_util import get_assembled_strings
from hed.models.df_util import get_assembled


class FactorHedTagsOp(BaseOp):
Expand Down Expand Up @@ -101,16 +101,16 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""

if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar, hed_schema=dispatcher.hed_schema)
input_data = TabularInput(df, hed_schema=dispatcher.hed_schema, sidecar=sidecar)
sidecar = Sidecar(sidecar)
input_data = TabularInput(df.copy(), sidecar=sidecar, name=name)
column_names = list(df.columns)
for name in self.query_names:
if name in column_names:
for query_name in self.query_names:
if query_name in column_names:
raise ValueError("QueryNameAlreadyColumn",
f"Query [{name}]: is already a column name of the data frame")
df = input_data.dataframe.copy()
df_list = [df]
hed_strings = get_assembled_strings(input_data, hed_schema=dispatcher.hed_schema, expand_defs=True)
f"Query [{query_name}]: is already a column name of the data frame")
df_list = [input_data.dataframe]
hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None,
join_columns=True, shrink_defs=False, expand_defs=True)
df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=self.query_names)
for parse_ind, parser in enumerate(self.expression_parsers):
for index, next_item in enumerate(hed_strings):
Expand Down
14 changes: 7 additions & 7 deletions hed/tools/remodeling/operations/factor_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.models.tabular_input import TabularInput
from hed.models.sidecar import Sidecar
from hed.tools.analysis.analysis_util import get_assembled_strings
from hed.models.df_util import get_assembled
from hed.tools.analysis.hed_type_manager import HedTypeManager

# TODO: restricted factor values are not implemented yet.
Expand Down Expand Up @@ -69,13 +69,13 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""

if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar, hed_schema=dispatcher.hed_schema)
input_data = TabularInput(df, hed_schema=dispatcher.hed_schema, sidecar=sidecar)
df = input_data.dataframe.copy()
df_list = [df]
hed_strings = get_assembled_strings(input_data, hed_schema=dispatcher.hed_schema, expand_defs=False)
sidecar = Sidecar(sidecar)
input_data = TabularInput(df, sidecar=sidecar, name=name)
df_list = [input_data.dataframe.copy()]
hed_strings, definitions = get_assembled(input_data, sidecar, dispatcher.hed_schema,
extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True)

definitions = input_data.get_definitions()
var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions)
var_manager.add_type_variable(self.type_tag.lower())

Expand Down
13 changes: 8 additions & 5 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from hed.tools.analysis.hed_tag_counts import HedTagCounts
from hed.tools.remodeling.operations.base_op import BaseOp
from hed.tools.remodeling.operations.base_context import BaseContext
from hed.models.df_util import get_assembled


class SummarizeHedTagsOp(BaseOp):
Expand Down Expand Up @@ -97,12 +98,14 @@ def update_context(self, new_context):
counts = HedTagCounts(new_context['name'], total_events=len(new_context['df']))
sidecar = new_context['sidecar']
if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar, hed_schema=new_context['schema'])
input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'], sidecar=sidecar)
sidecar = Sidecar(sidecar)
input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name'])
hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'],
extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True)
# definitions = input_data.get_definitions().gathered_defs
for objs in input_data.iter_dataframe(hed_ops=[new_context['schema']], return_string_only=False,
expand_defs=True, remove_definitions=True):
counts.update_event_counts(objs['HED'], new_context['name'])
for hed in hed_strings:
counts.update_event_counts(hed, new_context['name'])
self.summary_dict[new_context["name"]] = counts

def _get_summary_details(self, merge_counts):
Expand Down
11 changes: 6 additions & 5 deletions hed/tools/remodeling/operations/summarize_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from hed.models.tabular_input import TabularInput
from hed.models.sidecar import Sidecar
from hed.tools.analysis.analysis_util import get_assembled_strings
from hed.models.df_util import get_assembled
from hed.tools.analysis.hed_type_values import HedTypeValues
from hed.tools.analysis.hed_type_counts import HedTypeCounts
from hed.tools.analysis.hed_context_manager import HedContextManager
Expand Down Expand Up @@ -90,10 +90,11 @@ def __init__(self, sum_op):
def update_context(self, new_context):
sidecar = new_context['sidecar']
if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar, hed_schema=new_context['schema'])
input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'], sidecar=sidecar)
hed_strings = get_assembled_strings(input_data, hed_schema=new_context['schema'], expand_defs=False)
definitions = input_data.get_definitions().gathered_defs
sidecar = Sidecar(sidecar)
input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name'])
hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'],
extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True)
context_manager = HedContextManager(hed_strings, new_context['schema'])
type_values = HedTypeValues(context_manager, definitions, new_context['name'], type_tag=self.type_tag)

Expand Down
10 changes: 4 additions & 6 deletions hed/tools/remodeling/operations/summarize_hed_validation_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
return "\n".join(sum_list)

def update_context(self, new_context):
validator = HedValidator(hed_schema=new_context['schema'])
results = self.get_empty_results()
results["total_event_files"] = 1
results["event_issues"][new_context["name"]] = []
Expand All @@ -111,10 +110,9 @@ def update_context(self, new_context):
filtered_issues = []
if sidecar:
if not isinstance(sidecar, Sidecar):
sidecar = Sidecar(files=new_context['sidecar'], name=os.path.basename(sidecar),
hed_schema=new_context['schema'])
sidecar = Sidecar(files=new_context['sidecar'], name=os.path.basename(sidecar))
results["sidecar_issues"][sidecar.name] = []
sidecar_issues = sidecar.validate_entries(validator, check_for_warnings=self.check_for_warnings)
sidecar_issues = sidecar.validate(new_context['schema'])
filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR)
if not self.check_for_warnings:
sidecar_issues = filtered_issues
Expand All @@ -123,8 +121,8 @@ def update_context(self, new_context):
results['total_sidecar_files'] = 1
if not filtered_issues:
results['validation_completed'] = True
input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'], sidecar=sidecar)
issues = input_data.validate_file(validator, check_for_warnings=self.check_for_warnings)
input_data = TabularInput(new_context['df'], sidecar=sidecar)
issues = input_data.validate(new_context['schema'])
if not self.check_for_warnings:
issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
results['event_issues'][new_context["name"]] = issues
Expand Down
Loading