From fbb8fd81d1332d16f5da7bed61d89318b9667f6c Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 15 Mar 2023 17:46:41 -0500
Subject: [PATCH 01/19] First pass refactor of models

---
 hed/__init__.py                               |   3 +-
 hed/errors/error_messages.py                  | 142 ++---
 hed/errors/error_reporter.py                  |  49 +-
 hed/errors/error_types.py                     |   5 +-
 hed/errors/exceptions.py                      |   2 +
 hed/models/__init__.py                        |   3 -
 hed/models/base_input.py                      | 509 ++++++------------
 hed/models/column_mapper.py                   | 221 +++-----
 hed/models/column_metadata.py                 | 107 +---
 hed/models/def_mapper.py                      | 255 ---------
 hed/models/definition_dict.py                 | 154 +++++-
 hed/models/df_util.py                         | 125 +++++
 hed/models/expression_parser.py               |   4 +-
 hed/models/hed_group.py                       |  18 +-
 hed/models/hed_ops.py                         | 262 ---------
 hed/models/hed_string.py                      | 110 ++--
 hed/models/hed_tag.py                         | 143 +++--
 hed/models/sidecar.py                         | 254 ++++++---
 hed/models/sidecar_base.py                    | 269 ---------
 hed/models/spreadsheet_input.py               |  12 +-
 hed/models/tabular_input.py                   |  62 +--
 hed/models/timeseries_input.py                |   2 +-
 hed/schema/schema_compliance.py               |   2 +-
 hed/validator/__init__.py                     |   4 +
 hed/validator/def_validator.py                |  78 +++
 hed/validator/hed_validator.py                | 119 ++--
 .../onset_validator.py}                       |  46 +-
 hed/validator/sidecar_validator.py            | 147 +++++
 hed/validator/spreadsheet_validator.py        | 114 ++++
 hed/validator/tag_validator.py                | 100 ++--
 spec_tests/test_errors.py                     | 182 ++++---
 tests/data/model_tests/na_tag_column.tsv      |   2 +
 tests/data/model_tests/na_value_column.json   |   5 +
 tests/data/model_tests/na_value_column.tsv    |   3 +
 .../no_column_header_definition.tsv           |   4 +-
 .../no_column_header_definition_long.tsv      |   4 +-
 .../data/validator_tests/bids_events_HED.json |   3 +-
 tests/models/test_base_file_input.py          |  19 +-
 tests/models/test_column_mapper.py            |  90 +---
 tests/models/test_def_mapper.py               | 292 ----------
 tests/models/test_definition_dict.py          |  36 +-
 tests/models/test_expression_parser.py        |  11 +
 tests/models/test_hed_string.py               |  27 +
 tests/models/test_hed_tag.py                  |  28 +-
 tests/models/test_sidecar.py                  |  38 +-
 tests/models/test_spreadsheet_input.py        |  92 +---
 tests/models/test_tabular_input.py            |  55 +-
 tests/schema/test_convert_tags.py             |   2 +-
 tests/validator/test_def_validator.py         | 119 ++++
 tests/validator/test_hed_validator.py         |  92 +---
 .../test_onset_validator.py}                  | 227 +++-----
 tests/validator/test_tag_validator.py         |  48 +-
 tests/validator/test_tag_validator_base.py    |  29 +-
 tests/validator/test_tag_validator_library.py |  33 +-
 54 files changed, 1920 insertions(+), 2842 deletions(-)
 delete mode 100644 hed/models/def_mapper.py
 create mode 100644 hed/models/df_util.py
 delete mode 100644 hed/models/hed_ops.py
 delete mode 100644 hed/models/sidecar_base.py
 create mode 100644 hed/validator/def_validator.py
 rename hed/{models/onset_mapper.py => validator/onset_validator.py} (76%)
 create mode 100644 hed/validator/sidecar_validator.py
 create mode 100644 hed/validator/spreadsheet_validator.py
 create mode 100644 tests/data/model_tests/na_tag_column.tsv
 create mode 100644 tests/data/model_tests/na_value_column.json
 create mode 100644 tests/data/model_tests/na_value_column.tsv
 delete mode 100644 tests/models/test_def_mapper.py
 create mode 100644 tests/validator/test_def_validator.py
 rename tests/{models/test_onset_mapper.py => validator/test_onset_validator.py} (57%)

diff --git a/hed/__init__.py b/hed/__init__.py
index 40faff8ab..e2bdcd053 100644
--- a/hed/__init__.py
+++ b/hed/__init__.py
@@ -7,12 +7,13 @@
 from hed.models.spreadsheet_input import SpreadsheetInput
 from hed.models.tabular_input import TabularInput
 from hed.models.sidecar import Sidecar
+from hed.models.definition_dict import DefinitionDict
+
 
 from hed.schema.hed_schema import HedSchema
 from hed.schema.hed_schema_group import HedSchemaGroup
 from hed.schema.hed_schema_io import get_schema, get_schema_versions, load_schema, load_schema_version
 
-from hed.validator.hed_validator import HedValidator
 
 # from hed import errors, models, schema, tools, validator
 
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index 2d3647d9a..9ae9557f3 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -12,327 +12,333 @@
 @hed_tag_error(ValidationErrors.HED_UNITS_INVALID)
 def val_error_invalid_unit(tag, units):
     units_string = ','.join(sorted(units))
-    return f'Invalid unit - "{tag}" valid units are "{units_string}"', {
-        "units": sorted(units)
-    }
+    return f'Invalid unit - "{tag}" valid units are "{units_string}"'
 
 
 @hed_error(ValidationErrors.HED_TAG_EMPTY)
 def val_error_extra_comma(source_string, char_index):
     character = source_string[char_index]
-    return f"HED tags cannot be empty.  Extra delimiter found: '{character}' at index {char_index}'", {
-               'char_index': char_index
-           }
+    return f"HED tags cannot be empty.  Extra delimiter found: '{character}' at index {char_index}'"
 
 
 @hed_tag_error(ValidationErrors.HED_GROUP_EMPTY, actual_code=ValidationErrors.HED_TAG_EMPTY)
 def val_error_empty_group(tag):
-    return f"HED tags cannot be empty.  Extra delimiters found: '{tag}'", {}
+    return f"HED tags cannot be empty.  Extra delimiters found: '{tag}'"
 
 
 @hed_tag_error(ValidationErrors.HED_TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING)
 def val_error_tag_extended(tag, problem_tag):
-    return f"Hed tag is extended. '{problem_tag}' in {tag}", {}
+    return f"Hed tag is extended. '{problem_tag}' in {tag}"
 
 
 @hed_error(ValidationErrors.HED_CHARACTER_INVALID)
 def val_error_invalid_char(source_string, char_index):
     character = source_string[char_index]
-    return f'Invalid character "{character}" at index {char_index}"', {
-               'char_index': char_index
-           }
+    return f'Invalid character "{character}" at index {char_index}"'
 
 
 @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True,
                actual_code=ValidationErrors.HED_CHARACTER_INVALID)
 def val_error_invalid_tag_character(tag, problem_tag):
-    return f"Invalid character '{problem_tag}' in {tag}", {}
+    return f"Invalid character '{problem_tag}' in {tag}"
 
 
 @hed_error(ValidationErrors.HED_TILDES_UNSUPPORTED)
 def val_error_tildes_not_supported(source_string, char_index):
     character = source_string[char_index]
-    return f"Tildes not supported.  Replace (a ~ b ~ c) with (a, (b, c)).  '{character}' at index {char_index}'", {
-               'char_index': char_index
-           }
+    return f"Tildes not supported.  Replace (a ~ b ~ c) with (a, (b, c)).  '{character}' at index {char_index}'"
 
 
 @hed_error(ValidationErrors.HED_COMMA_MISSING)
 def val_error_comma_missing(tag):
-    return f"Comma missing after - '{tag}'", {}
+    return f"Comma missing after - '{tag}'"
 
 
 @hed_tag_error(ValidationErrors.HED_TAG_REPEATED)
 def val_error_duplicate_tag(tag):
-    return f'Repeated tag - "{tag}"', {}
+    return f'Repeated tag - "{tag}"'
 
 
 @hed_error(ValidationErrors.HED_TAG_REPEATED_GROUP)
 def val_error_duplicate_group(group):
-    return f'Repeated group - "{group}"', {}
+    return f'Repeated group - "{group}"'
 
 
 @hed_error(ValidationErrors.HED_PARENTHESES_MISMATCH)
 def val_error_parentheses(opening_parentheses_count, closing_parentheses_count):
     return f'Number of opening and closing parentheses are unequal. '\
            f'{opening_parentheses_count} opening parentheses. {closing_parentheses_count} '\
-           'closing parentheses', {}
+           'closing parentheses'
 
 
 @hed_tag_error(ValidationErrors.HED_TAG_REQUIRES_CHILD)
 def val_error_require_child(tag):
-    return f"Descendant tag required - '{tag}'", {}
+    return f"Descendant tag required - '{tag}'"
 
 
 @hed_error(ValidationErrors.HED_TAG_NOT_UNIQUE)
 def val_error_multiple_unique(tag_prefix):
-    return f"Multiple unique tags with prefix - '{tag_prefix}'", {}
+    return f"Multiple unique tags with prefix - '{tag_prefix}'"
+
+
+@hed_tag_error(ValidationErrors.TAG_PREFIX_INVALID)
+def val_error_prefix_invalid(tag, tag_prefix):
+    return f"Prefixes can only contain alpha characters. - '{tag_prefix}'"
 
 
 @hed_tag_error(ValidationErrors.INVALID_EXTENSION, actual_code=ValidationErrors.HED_TAG_INVALID)
 def val_error_invalid_extension(tag):
-    return f'Invalid extension on tag - "{tag}"', {}
+    return f'Invalid extension on tag - "{tag}"'
 
 
 @hed_tag_error(ValidationErrors.INVALID_PARENT_NODE, has_sub_tag=True, actual_code=ValidationErrors.HED_TAG_INVALID)
 def val_error_invalid_parent(tag, problem_tag, expected_parent_tag):
     return f"In '{tag}', '{problem_tag}' appears as '{str(expected_parent_tag)}' and cannot be used " \
-           f"as an extension.", {"expected_parent_tag": expected_parent_tag}
+           f"as an extension."
 
 
 @hed_tag_error(ValidationErrors.NO_VALID_TAG_FOUND, has_sub_tag=True, actual_code=ValidationErrors.HED_TAG_INVALID)
 def val_error_no_valid_tag(tag, problem_tag):
-    return f"'{problem_tag}' in {tag} is not a valid base hed tag.", {}
+    return f"'{problem_tag}' in {tag} is not a valid base hed tag."
 
 
 @hed_tag_error(ValidationErrors.HED_VALUE_INVALID)
 def val_error_no_value(tag):
-    return f"''{tag}' has an invalid value portion.", {}
+    return f"''{tag}' has an invalid value portion."
 
 
 @hed_error(ValidationErrors.HED_MISSING_REQUIRED_COLUMN, default_severity=ErrorSeverity.WARNING)
 def val_error_missing_column(column_name):
-    return f"Required column '{column_name}' not specified or found in file.", {}
+    return f"Required column '{column_name}' not specified or found in file."
 
 
 @hed_error(ValidationErrors.HED_UNKNOWN_COLUMN, default_severity=ErrorSeverity.WARNING)
 def val_error_extra_column(column_name):
     return f"Column named '{column_name}' found in file, but not specified as a tag column " + \
-        "or identified in sidecars.", {}
+        "or identified in sidecars."
 
 
 @hed_error(ValidationErrors.HED_BLANK_COLUMN, default_severity=ErrorSeverity.WARNING)
 def val_error_hed_blank_column(column_number):
-    return f"Column number {column_number} has no column name", {}
+    return f"Column number {column_number} has no column name"
 
 
 @hed_error(ValidationErrors.HED_DUPLICATE_COLUMN, default_severity=ErrorSeverity.WARNING)
 def val_error_hed_duplicate_column(column_name):
-    return f"Multiple columns have name {column_name}.  This is not a fatal error, but discouraged.", {}
+    return f"Multiple columns have name {column_name}.  This is not a fatal error, but discouraged."
 
 
 @hed_tag_error(ValidationErrors.HED_LIBRARY_UNMATCHED)
 def val_error_unknown_prefix(tag, unknown_prefix, known_prefixes):
-    return f"Tag '{tag} has unknown prefix '{unknown_prefix}'.  Valid prefixes: {known_prefixes}", {}
+    return f"Tag '{tag} has unknown prefix '{unknown_prefix}'.  Valid prefixes: {known_prefixes}"
 
 
 @hed_tag_error(ValidationErrors.HED_NODE_NAME_EMPTY, has_sub_tag=True)
 def val_error_extra_slashes_spaces(tag, problem_tag):
-    return f"Extra slashes or spaces '{problem_tag}' in tag '{tag}'", {}
+    return f"Extra slashes or spaces '{problem_tag}' in tag '{tag}'"
 
 
 @hed_error(ValidationErrors.HED_SIDECAR_KEY_MISSING, default_severity=ErrorSeverity.WARNING)
 def val_error_sidecar_key_missing(invalid_key, category_keys):
-    return f"Category key '{invalid_key}' does not exist in column.  Valid keys are: {category_keys}", {}
+    return f"Category key '{invalid_key}' does not exist in column.  Valid keys are: {category_keys}"
 
 
 @hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED)
 def val_error_def_unmatched(tag):
-    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'", {}
+    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'"
 
 
 @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID)
 def val_error_bad_def_expand(tag, actual_def, found_def):
     return f"A data-recording’s Def-expand tag does not match the given definition." + \
-           f"Tag: '{tag}'.  Actual Def: {actual_def}.  Found Def: {found_def}", {}
+           f"Tag: '{tag}'.  Actual Def: {actual_def}.  Found Def: {found_def}"
 
 
 @hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
 def val_error_def_value_missing(tag):
-    return f"A def tag requires a placeholder value, but was not given one.  Definition: '{tag}'", {}
+    return f"A def tag requires a placeholder value, but was not given one.  Definition: '{tag}'"
 
 
 @hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
 def val_error_def_value_extra(tag):
-    return f"A def tag does not take a placeholder value, but was given one.  Definition: '{tag}", {}
+    return f"A def tag does not take a placeholder value, but was given one.  Definition: '{tag}"
 
 
 @hed_tag_error(ValidationErrors.HED_TOP_LEVEL_TAG, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR)
 def val_error_top_level_tag(tag):
-    return f"A tag that must be in a top level group was found in another location.  {str(tag)}", {}
+    return f"A tag that must be in a top level group was found in another location.  {str(tag)}"
 
 
 @hed_tag_error(ValidationErrors.HED_TAG_GROUP_TAG, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR)
 def val_error_tag_group_tag(tag):
-    return f"A tag that must be in a group was found in another location.  {str(tag)}", {}
+    return f"A tag that must be in a group was found in another location.  {str(tag)}"
 
 
 @hed_tag_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR)
 def val_error_top_level_tags(tag, multiple_tags):
     tags_as_string = [str(tag) for tag in multiple_tags]
     return f"Multiple top level tags found in a single group.  First one found: {str(tag)}. " + \
-           f"Remainder:{str(tags_as_string)}", {}
+           f"Remainder:{str(tags_as_string)}"
 
 
 @hed_error(ValidationErrors.HED_REQUIRED_TAG_MISSING)
 def val_warning_required_prefix_missing(tag_prefix):
-    return f"Tag with prefix '{tag_prefix}' is required", {}
+    return f"Tag with prefix '{tag_prefix}' is required"
 
 
 @hed_tag_error(ValidationErrors.HED_STYLE_WARNING, default_severity=ErrorSeverity.WARNING)
 def val_warning_capitalization(tag):
-    return f"First word not capitalized or camel case - '{tag}'", {}
+    return f"First word not capitalized or camel case - '{tag}'"
 
 
 @hed_tag_error(ValidationErrors.HED_UNITS_DEFAULT_USED, default_severity=ErrorSeverity.WARNING)
 def val_warning_default_units_used(tag, default_unit):
-    return f"No unit specified. Using '{default_unit}' as the default - '{tag}'", {}
+    return f"No unit specified. Using '{default_unit}' as the default - '{tag}'"
 
 
 @hed_error(SchemaErrors.HED_SCHEMA_DUPLICATE_NODE)
 def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
     tag_join_delimiter = "\n\t"
     return f"Duplicate term '{str(tag)}' used {len(duplicate_tag_list)} places in '{section}' section schema as:" + \
-           f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}", {}
+           f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
 
 
 @hed_error(SchemaErrors.HED_SCHEMA_ATTRIBUTE_INVALID)
 def schema_error_unknown_attribute(attribute_name, source_tag):
     return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
-           f"or was used outside of it's defined class.", {}
+           f"or was used outside of it's defined class."
 
 
 @hed_error(SchemaWarnings.INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING,
            actual_code=SchemaWarnings.HED_SCHEMA_CHARACTER_INVALID)
 def schema_warning_invalid_chars_desc(desc_string, tag_name, problem_char, char_index):
-    return f"Invalid character '{problem_char}' in desc for '{tag_name}' at position {char_index}.  '{desc_string}", {}
+    return f"Invalid character '{problem_char}' in desc for '{tag_name}' at position {char_index}.  '{desc_string}"
 
 
 @hed_error(SchemaWarnings.INVALID_CHARACTERS_IN_TAG, default_severity=ErrorSeverity.WARNING,
            actual_code=SchemaWarnings.HED_SCHEMA_CHARACTER_INVALID)
 def schema_warning_invalid_chars_tag(tag_name, problem_char, char_index):
-    return f"Invalid character '{problem_char}' in tag '{tag_name}' at position {char_index}.", {}
+    return f"Invalid character '{problem_char}' in tag '{tag_name}' at position {char_index}."
 
 
 @hed_error(SchemaWarnings.INVALID_CAPITALIZATION, default_severity=ErrorSeverity.WARNING)
 def schema_warning_invalid_capitalization(tag_name, problem_char, char_index):
     return "First character must be a capital letter or number.  " + \
-           f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}.", \
-           {'problem_char': problem_char}
+           f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}."
 
 
 @hed_error(SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING)
 def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
     return "Only placeholder nodes('#') can have a unit or value class." + \
-           f"Found {invalid_attribute_name} on {tag_name}", {}
+           f"Found {invalid_attribute_name} on {tag_name}"
 
 
 @hed_error(SidecarErrors.BLANK_HED_STRING)
 def sidecar_error_blank_hed_string():
-    return "No HED string found for Value or Category column.", {}
+    return "No HED string found for Value or Category column."
 
 
 @hed_error(SidecarErrors.WRONG_HED_DATA_TYPE)
 def sidecar_error_hed_data_type(expected_type, given_type):
-    return f"Invalid HED string datatype sidecar. Should be '{expected_type}', but got '{given_type}'", {}
+    return f"Invalid HED string datatype sidecar. Should be '{expected_type}', but got '{given_type}'"
 
 
 @hed_error(SidecarErrors.INVALID_POUND_SIGNS_VALUE, actual_code=ValidationErrors.HED_PLACEHOLDER_INVALID)
 def sidecar_error_invalid_pound_sign_count(pound_sign_count):
-    return f"There should be exactly one # character in a sidecar string. Found {pound_sign_count}", {}
+    return f"There should be exactly one # character in a sidecar string. Found {pound_sign_count}"
 
 
 @hed_error(SidecarErrors.INVALID_POUND_SIGNS_CATEGORY, actual_code=ValidationErrors.HED_PLACEHOLDER_INVALID)
 def sidecar_error_too_many_pound_signs(pound_sign_count):
-    return f"There should be no # characters in a category sidecar string. Found {pound_sign_count}", {}
+    return f"There should be no # characters in a category sidecar string. Found {pound_sign_count}"
 
 
 @hed_error(SidecarErrors.UNKNOWN_COLUMN_TYPE)
 def sidecar_error_unknown_column(column_name):
     return f"Could not automatically identify column '{column_name}' type from file. "\
-           "Most likely the column definition in question needs a # sign to replace a number somewhere.", {}
+           "Most likely the column definition in question needs a # sign to replace a number somewhere."
+
+
+@hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=SidecarErrors.SIDECAR_INVALID)
+def sidecar_hed_used():
+    return "'HED' is a reserved name and cannot be used as a sidecar column name"
+
+
+@hed_error(SidecarErrors.SIDECAR_NA_USED, actual_code=SidecarErrors.SIDECAR_INVALID)
+def sidecar_na_used(column_name):
+    return f"Invalid category key 'n/a' found in column {column_name}."
 
 
 @hed_tag_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_def_tag_in_definition(tag, def_name):
     return f"Invalid tag {tag} found in definition for {def_name}. " +\
-           f"Def and Def-expand tags cannot be in definitions.", {}
+           f"Def and Def-expand tags cannot be in definitions."
 
 
 @hed_error(DefinitionErrors.WRONG_NUMBER_GROUP_TAGS, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_wrong_group_tags(def_name, tag_list):
     tag_list_strings = [str(tag) for tag in tag_list]
-    return f"Too many group tags found in definition for {def_name}.  Expected 1, found: {tag_list_strings}", {}
+    return f"Too many group tags found in definition for {def_name}.  Expected 1, found: {tag_list_strings}"
 
 
 @hed_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_wrong_placeholder_count(def_name, expected_count, tag_list):
     tag_list_strings = [str(tag) for tag in tag_list]
     return f"Incorrect number placeholder tags found in definition for {def_name}.  " + \
-           f"Expected {expected_count}, found: {tag_list_strings}", {}
+           f"Expected {expected_count}, found: {tag_list_strings}"
 
 
 @hed_error(DefinitionErrors.DUPLICATE_DEFINITION, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_duplicate_definition(def_name):
-    return f"Duplicate definition found for '{def_name}'.", {}
+    return f"Duplicate definition found for '{def_name}'."
 
 
 @hed_error(DefinitionErrors.TAG_IN_SCHEMA, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_tag_already_in_schema(def_name):
-    return f"Term '{def_name}' already used as term in schema and cannot be re-used as a definition.", {}
+    return f"Term '{def_name}' already used as term in schema and cannot be re-used as a definition."
 
 
 @hed_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, actual_code=ValidationErrors.HED_DEFINITION_INVALID)
 def def_error_invalid_def_extension(def_name):
-    return f"Term '{def_name}' has an invalid extension.  Definitions can only have one term.", {}
+    return f"Term '{def_name}' has an invalid extension.  Definitions can only have one term."
 
 
 @hed_tag_error(OnsetErrors.ONSET_DEF_UNMATCHED, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_error_def_unmatched(tag):
-    return f"The def tag in an onset/offset tag is unmatched.  Def tag: '{tag}'", {}
+    return f"The def tag in an onset/offset tag is unmatched.  Def tag: '{tag}'"
 
 
 @hed_tag_error(OnsetErrors.OFFSET_BEFORE_ONSET, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_error_offset_before_onset(tag):
-    return f"Offset tag '{tag}' does not have a matching onset.", {}
+    return f"Offset tag '{tag}' does not have a matching onset."
 
 
 @hed_tag_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_no_def_found(tag):
-    return f"'{tag}' tag has no def or def-expand tag in string.", {}
+    return f"'{tag}' tag has no def or def-expand tag in string."
 
 
 @hed_tag_error(OnsetErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_too_many_defs(tag, tag_list):
     tag_list_strings = [str(tag) for tag in tag_list]
-    return f"Too many def tags found in onset for {tag}.  Expected 1, also found: {tag_list_strings}", {}
+    return f"Too many def tags found in onset for {tag}.  Expected 1, also found: {tag_list_strings}"
 
 
 @hed_tag_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_too_many_groups(tag, tag_list):
     tag_list_strings = [str(a_tag) for a_tag in tag_list]
     return f"An onset tag should have at most 2 sibling nodes, an offset tag should have 1. " +\
-           f"Found {len(tag_list_strings)}: {tag_list_strings}", {}
+           f"Found {len(tag_list_strings)}: {tag_list_strings}"
 
 
 @hed_tag_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_wrong_type_tag(tag, def_tag):
     return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'.  All onset context tags must be " + \
-           f"in a single group together.", {}
+           f"in a single group together."
 
 
 @hed_tag_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, actual_code=ValidationErrors.HED_ONSET_OFFSET_ERROR)
 def onset_wrong_placeholder(tag, has_placeholder):
     if has_placeholder:
-        return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one.", {}
-    return f"Onset/offset def tag {tag} should not have a placeholder, but has one.", {}
+        return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one."
+    return f"Onset/offset def tag {tag} should not have a placeholder, but has one."
diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py
index 8f8b1e368..4a7fd91a9 100644
--- a/hed/errors/error_reporter.py
+++ b/hed/errors/error_reporter.py
@@ -43,8 +43,8 @@ def wrapper(*args, severity=default_severity, **kwargs):
             Returns:
                 list: A list of dict with the errors.=
             """
-            base_message, error_vars = func(*args, **kwargs)
-            error_object = ErrorHandler._create_error_object(actual_code, base_message, severity, **error_vars)
+            base_message = func(*args, **kwargs)
+            error_object = ErrorHandler._create_error_object(actual_code, base_message, severity)
             return error_object
 
         _register_error_function(error_type, wrapper_func=wrapper)
@@ -97,8 +97,8 @@ def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severit
                 except AttributeError:
                     org_tag_text = str(tag)
 
-                base_message, error_vars = func(org_tag_text, problem_sub_tag, *args, **kwargs)
-                error_object = ErrorHandler._create_error_object(actual_code, base_message, severity, **error_vars,
+                base_message = func(org_tag_text, problem_sub_tag, *args, **kwargs)
+                error_object = ErrorHandler._create_error_object(actual_code, base_message, severity,
                                                                  index_in_tag=index_in_tag,
                                                                  index_in_tag_end=index_in_tag_end, source_tag=tag)
 
@@ -129,8 +129,8 @@ def wrapper(tag, *args, severity=default_severity, **kwargs):
                     org_tag_text = tag.get_original_hed_string()
                 else:
                     org_tag_text = str(tag)
-                base_message, error_vars = func(org_tag_text, *args, **kwargs)
-                error_object = ErrorHandler._create_error_object(actual_code, base_message, severity, **error_vars,
+                base_message = func(org_tag_text, *args, **kwargs)
+                error_object = ErrorHandler._create_error_object(actual_code, base_message, severity,
                                                                  source_tag=tag)
 
                 return error_object
@@ -148,9 +148,10 @@ def wrapper(tag, *args, severity=default_severity, **kwargs):
 
 
 class ErrorHandler:
-    def __init__(self):
+    def __init__(self, check_for_warnings=True):
         # The current (ordered) dictionary of contexts.
         self.error_context = []
+        self._check_for_warnings = check_for_warnings
 
     def push_error_context(self, context_type, context, increment_depth_after=True):
         """ Push a new error context to narrow down error scope.
@@ -191,8 +192,12 @@ def get_error_context_copy(self):
     def format_error_with_context(self, *args, **kwargs):
         error_object = ErrorHandler.format_error(*args, **kwargs)
         if self is not None:
-            self._add_context_to_errors(error_object[0], self.error_context)
-            self._update_error_with_char_pos(error_object[0])
+            actual_error = error_object[0]
+            # # Filter out warning errors
+            if not self._check_for_warnings and actual_error['severity'] >= ErrorSeverity.WARNING:
+                return []
+            self._add_context_to_errors(actual_error, self.error_context)
+            self._update_error_with_char_pos(actual_error)
 
         return error_object
 
@@ -225,26 +230,19 @@ def format_error(error_type, *args, actual_error=None, **kwargs):
 
         return [error_object]
 
-    def add_context_to_issues(self, issues):
+    def add_context_and_filter(self, issues):
+        """ Filter out warnings if requested, while adding context to issues.
+
+            issues(list):
+                list:   A list containing a single dictionary representing a single error.
+        """
+        if not self._check_for_warnings:
+            issues[:] = self.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
+
         for error_object in issues:
             self._add_context_to_errors(error_object, self.error_context)
             self._update_error_with_char_pos(error_object)
 
-    def format_error_list(self, issue_params):
-        """ Convert an issue params list to an issues list.  This means adding the error context primarily.
-
-        Parameters:
-            issue_params (list):  A list of dict containing the unformatted issues list.
-
-        Returns:
-            list: A list of dict containing unformatted errors.
-
-        """
-        formatted_issues = []
-        for issue in issue_params:
-            formatted_issues += self.format_error(**issue)
-        return formatted_issues
-
     @staticmethod
     def format_error_from_context(error_type, error_context, *args, actual_error=None, **kwargs):
         """ Format an error based on the error type.
@@ -262,6 +260,7 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non
         Notes:
             - Generally the error_context is returned from _add_context_to_errors.
             - The actual_error is useful for errors that are shared like invalid character.
+            - This can't filter out warnings like the other ones.
 
         """
         error_func = error_functions.get(error_type)
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index feb21bef6..ac76f6992 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -47,6 +47,7 @@ class ValidationErrors:
     HED_UNITS_DEFAULT_USED = 'HED_UNITS_DEFAULT_USED'
     HED_VALUE_INVALID = 'HED_VALUE_INVALID'
     HED_LIBRARY_UNMATCHED = "HED_LIBRARY_UNMATCHED"
+    TAG_PREFIX_INVALID = "TAG_PREFIX_INVALID"
     # HED_VERSION_WARNING
 
     HED_MISSING_REQUIRED_COLUMN = "HED_MISSING_REQUIRED_COLUMN"
@@ -75,12 +76,14 @@ class ValidationErrors:
 
 class SidecarErrors:
     # These are for json sidecar validation errors(sidecars can also produce most normal validation errors)
+    SIDECAR_INVALID = "SIDECAR_INVALID" # this is the generic error reported for several later ones
     BLANK_HED_STRING = 'blankValueString'
     WRONG_HED_DATA_TYPE = 'wrongHedDataType'
     INVALID_POUND_SIGNS_VALUE = 'invalidNumberPoundSigns'
     INVALID_POUND_SIGNS_CATEGORY = 'tooManyPoundSigns'
     UNKNOWN_COLUMN_TYPE = 'sidecarUnknownColumn'
-
+    SIDECAR_HED_USED = 'SIDECAR_HED_USED'
+    SIDECAR_NA_USED = 'SIDECAR_NA_USED'
 
 class SchemaErrors:
     HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py
index 4b90f9b66..72ab0eead 100644
--- a/hed/errors/exceptions.py
+++ b/hed/errors/exceptions.py
@@ -8,6 +8,8 @@ class HedExceptions:
     CANNOT_PARSE_JSON = 'cannotParseJson'
     INVALID_EXTENSION = 'invalidExtension'
 
+    INVALID_DATAFRAME = 'INVALID_DATAFRAME'
+
     # These are actual schema issues, not that the file cannot be found or parsed
     SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
     HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
diff --git a/hed/models/__init__.py b/hed/models/__init__.py
index 07c044319..3f6d50d56 100644
--- a/hed/models/__init__.py
+++ b/hed/models/__init__.py
@@ -5,15 +5,12 @@
 from .column_metadata import ColumnMetadata, ColumnType
 from .definition_dict import DefinitionDict
 from .definition_entry import DefinitionEntry
-from .def_mapper import DefMapper
 from .expression_parser import QueryParser
 from .hed_group import HedGroup
 from .spreadsheet_input import SpreadsheetInput
-from .hed_ops import HedOps
 from .hed_string import HedString
 from .hed_string_group import HedStringGroup
 from .hed_tag import HedTag
-from .onset_mapper import OnsetMapper
 from .sidecar import Sidecar
 from .tabular_input import TabularInput
 from .timeseries_input import TimeseriesInput
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 33a35a96a..869bc4ea6 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -1,19 +1,12 @@
+import re
 import os
+
 import openpyxl
 import pandas
-import copy
 
-from hed.models.definition_dict import DefinitionDict
 from hed.models.column_mapper import ColumnMapper
 from hed.errors.exceptions import HedFileError, HedExceptions
-from hed.errors.error_types import ErrorContext, ErrorSeverity
 from hed.errors.error_reporter import ErrorHandler
-from hed.models import model_constants
-from hed.models.hed_ops import translate_ops
-from hed.models.onset_mapper import OnsetMapper
-from hed.models.hed_string import HedString
-from hed.models.hed_string_group import HedStringGroup
-from hed.models.def_mapper import DefMapper
 
 
 class BaseInput:
@@ -27,8 +20,8 @@ class BaseInput:
     TAB_DELIMITER = '\t'
     COMMA_DELIMITER = ','
 
-    def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, def_mapper=None,
-                 definition_columns=None, name=None, allow_blank_names=True, hed_schema=None):
+    def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None,
+                 allow_blank_names=True):
         """ Constructor for the BaseInput class.
 
         Parameters:
@@ -40,10 +33,8 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
             has_column_names (bool): True if file has column names.
                 This value is ignored if you pass in a pandas dataframe.
             mapper (ColumnMapper or None):  Indicates which columns have HED tags.
-            definition_columns(list or None): A list of columns to check for definitions.  Explicit 'None' means all.
             name (str or None): Optional field for how this file will report errors.
             allow_blank_names(bool): If True, column names can be blank
-            hed_schema(HedSchema or None): The schema to use by default in identifying tags
         Notes:
             - See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.
 
@@ -51,17 +42,11 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         if mapper is None:
             mapper = ColumnMapper()
         self._mapper = mapper
-        if def_mapper is None:
-            def_mapper = DefMapper(mapper.get_def_dicts())
-        self._def_mapper = def_mapper
         self._has_column_names = has_column_names
         self._name = name
-        # This is the loaded workbook if we loaded originally from an excel file.
+        # This is the loaded workbook if we loaded originally from an Excel file.
         self._loaded_workbook = None
         self._worksheet_name = worksheet_name
-        self._def_columns = definition_columns
-        self._schema = hed_schema
-        self.file_def_dict = None
         pandas_header = 0
         if not self._has_column_names:
             pandas_header = None
@@ -82,7 +67,9 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
             self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
-                                              dtype=str, keep_default_na=False, na_values=None)
+                                              dtype=str, keep_default_na=True, na_values=None)
+            # Convert nan values to a known value
+            self._dataframe = self._dataframe.fillna("n/a")
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -90,8 +77,11 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         else:
             raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)
 
-        column_issues = ColumnMapper.validate_column_map(self.columns,
-                                                         allow_blank_names=allow_blank_names)
+        if self._dataframe.size == 0:
+            raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)
+
+        # todo: Can we get rid of this behavior now that we're using pandas?
+        column_issues = ColumnMapper.validate_column_map(self.columns, allow_blank_names=allow_blank_names)
         if column_issues:
             raise HedFileError(HedExceptions.BAD_COLUMN_NAMES, "Duplicate or blank columns found.  See issues.",
                                self.name, issues=column_issues)
@@ -113,15 +103,29 @@ def reset_mapper(self, new_mapper):
             columns = self._dataframe.columns
             self._mapper.set_column_map(columns)
 
-        self.file_def_dict = self.extract_definitions()
-
-        self.update_definition_mapper(self.file_def_dict)
-
     @property
     def dataframe(self):
         """ The underlying dataframe. """
         return self._dataframe
 
+    @property
+    def dataframe_a(self):
+        """Return the assembled dataframe
+            Probably a placeholder name.
+
+        Returns:
+            Dataframe: the assembled dataframe"""
+        return self.assemble()
+
+    @property
+    def series_a(self):
+        """Return the assembled dataframe as a series
+            Probably a placeholder name.
+
+        Returns:
+            Series: the assembled dataframe with columns merged"""
+        return self.combine_dataframe(self.assemble())
+
     @property
     def name(self):
         """ Name of the data. """
@@ -142,125 +146,101 @@ def worksheet_name(self):
         """ The worksheet name. """
         return self._worksheet_name
 
-    def get_definitions(self, as_strings=False):
-        if as_strings:
-            return DefinitionDict.get_as_strings(self._def_mapper.gathered_defs)
-        else:
-            return self._def_mapper
-
-    def _convert_to_form(self, hed_schema, tag_form, error_handler):
-        """ Convert all tags to the specified form.
+    def convert_to_form(self, hed_schema, tag_form):
+        """ Convert all tags in underlying dataframe to the specified form.
 
         Parameters:
-            hed_schema (HedSchema or None): The schema to use to convert tags.
-                If None, uses the one used to open the file.
-            tag_form (str): The form to convert the tags to (short_tag, long_tag, base_tag, etc).
-            error_handler (ErrorHandler or None): The error handler to use for context or default if none.
+            hed_schema (HedSchema): The schema to use to convert tags.
+            tag_form(str): HedTag property to convert tags to.
+                Most cases should use convert_to_short or convert_to_long below.
+        """
+        from hed.models.df_util import convert_to_form
+        convert_to_form(self._dataframe, hed_schema, tag_form, self._mapper.get_tag_columns())
 
-        Returns:
-            dict: A list of issue dictionaries corresponding to issues found during conversion.
+    def convert_to_short(self, hed_schema):
+        """ Convert all tags in underlying dataframe to short form.
 
+        Parameters:
+            hed_schema (HedSchema): The schema to use to convert tags.
         """
-        error_list = []
-        if hed_schema is None:
-            hed_schema = self._schema
-        if hed_schema is None:
-            raise ValueError("Cannot convert between tag forms without a schema.")
-        for row_number, row_dict in enumerate(self.iter_dataframe(hed_ops=hed_schema,
-                                                                  return_string_only=False,
-                                                                  remove_definitions=False,
-                                                                  requested_columns=self._mapper.get_tag_columns(),
-                                                                  error_handler=error_handler)):
-            column_to_hed_tags_dictionary = row_dict[model_constants.COLUMN_TO_HED_TAGS]
-            error_list += row_dict[model_constants.ROW_ISSUES]
-            for column_number in column_to_hed_tags_dictionary:
-                column_hed_string = column_to_hed_tags_dictionary[column_number]
-                self.set_cell(row_number, column_number, column_hed_string,
-                              include_column_prefix_if_exist=False, tag_form=tag_form)
-
-        return error_list
-
-    def convert_to_short(self, hed_schema=None, error_handler=None):
-        """ Convert all tags to short form.
+        return self.convert_to_form(hed_schema, "short_tag")
+
+    def convert_to_long(self, hed_schema):
+        """ Convert all tags in underlying dataframe to long form.
 
         Parameters:
             hed_schema (HedSchema or None): The schema to use to convert tags.
-                If None, uses the one used to open the file.
-            error_handler (ErrorHandler): The error handler to use for context, uses a default if none.
-
-        Returns:
-            dict: A list of issue dictionaries corresponding to issues found during conversion.
-
         """
-        return self._convert_to_form(hed_schema, "short_tag", error_handler)
+        return self.convert_to_form(hed_schema, "long_tag")
 
-    def convert_to_long(self, hed_schema=None, error_handler=None):
-        """ Convert all tags to long form.
+    def shrink_defs(self, hed_schema):
+        """ Shrinks any def-expand found in the underlying dataframe.
 
         Parameters:
-            hed_schema (HedSchema or None): The schema to use to convert tags.
-                If None, uses the one used to open the file.
-            error_handler (ErrorHandler): The error handler to use for context, uses a default if none.
+            hed_schema (HedSchema or None): The schema to use to identify defs
+        """
+        from df_util import shrink_defs
+        shrink_defs(self._dataframe, hed_schema=hed_schema, columns=self._mapper.get_tag_columns())
 
-        Returns:
-            dict: A list of issue dictionaries corresponding to issues found during conversion.
+    def expand_defs(self, hed_schema, def_dict):
+        """ Shrinks any def-expand found in the underlying dataframe.
 
+        Parameters:
+            hed_schema (HedSchema or None): The schema to use to identify defs
+            def_dict (DefinitionDict): The definitions to expand
         """
-        return self._convert_to_form(hed_schema, "long_tag", error_handler)
+        from df_util import expand_defs
+        expand_defs(self._dataframe, hed_schema=hed_schema, def_dict=def_dict, columns=self._mapper.get_tag_columns())
 
-    def to_excel(self, file, output_processed_file=False):
+    def to_excel(self, file, output_assembled=False):
         """ Output to an Excel file.
 
         Parameters:
             file (str or file-like):      Location to save this base input.
-            output_processed_file (bool): If True, replace definitions and labels in HED columns.
-                                          Also fills in things like categories.
+            output_assembled (bool): Plug in categories and values from the sidecar directly.
         Raises:
-            HedFileError if empty file object or file cannot be opened.
+            ValueError: if empty file object or file cannot be opened.
         """
         if not file:
             raise ValueError("Empty file name or object passed in to BaseInput.save.")
 
-        # For now just make a copy if we want to save a formatted copy.  Could optimize this further.
-        if output_processed_file:
-            output_file = self._get_processed_copy()
-        else:
-            output_file = self
+        dataframe = self._dataframe
+
+        if output_assembled:
+            dataframe = self.dataframe_a
 
         if self._loaded_workbook:
             old_worksheet = self.get_worksheet(self._worksheet_name)
-            # excel spreadsheets are 1 based, then add another 1 for column names if present
+            # Excel spreadsheets are 1 based, then add another 1 for column names if present
             adj_row_for_col_names = 1
             if self._has_column_names:
                 adj_row_for_col_names += 1
             adj_for_one_based_cols = 1
-            for row_number, text_file_row in output_file._dataframe.iterrows():
+            for row_number, text_file_row in dataframe.iterrows():
                 for column_number, column_text in enumerate(text_file_row):
                     old_worksheet.cell(row_number + adj_row_for_col_names,
                                        column_number + adj_for_one_based_cols).value = \
-                        output_file._dataframe.iloc[row_number, column_number]
+                        dataframe.iloc[row_number, column_number]
             self._loaded_workbook.save(file)
         else:
-            output_file._dataframe.to_excel(file, header=self._has_column_names)
+            dataframe.to_excel(file, header=self._has_column_names)
 
-    def to_csv(self, file=None, output_processed_file=False):
+    def to_csv(self, file=None, output_assembled=False):
         """ Write to file or return as a string.
 
         Parameters:
             file (str, file-like, or None): Location to save this file. If None, return as string.
-            output_processed_file (bool): Replace all definitions and labels in HED columns as appropriate.
-                                          Also fills in things like categories.
+            output_assembled (bool): Plug in categories and values from the sidecar directly.
         Returns:
             None or str:  None if file is given or the contents as a str if file is None.
 
         """
-        # For now just make a copy if we want to save a formatted copy.  Could optimize this further.
-        if output_processed_file:
-            output_file = self._get_processed_copy()
-        else:
-            output_file = self
-        csv_string_if_filename_none = output_file._dataframe.to_csv(file, '\t', index=False,
-                                                                    header=output_file._has_column_names)
+        dataframe = self._dataframe
+
+        if output_assembled:
+            dataframe = self.dataframe_a
+
+        csv_string_if_filename_none = dataframe.to_csv(file, '\t', index=False, header=self._has_column_names)
         return csv_string_if_filename_none
 
     @property
@@ -277,118 +257,32 @@ def columns(self):
             columns = list(self._dataframe.columns)
         return columns
 
-    @property
-    def def_dict(self):
-        """  Returns a dict of all the definitions found in this and sidecars
+    def column_metadata(self):
+        """Get the metadata for each column
 
         Returns:
-            def_dict(dict): {str: DefinitionEntry} pairs for each found definition
+            dict: number/ColumnMeta pairs
         """
-        if self._def_mapper:
-            return self._def_mapper.gathered_defs
+        if self._mapper:
+            return self._mapper._final_column_map
         return {}
 
-    def __iter__(self):
-        """ Iterate over the underlying dataframe. """
-        return self.iter_dataframe()
-
-    def iter_dataframe(self, hed_ops=None, mapper=None, requested_columns=None, return_string_only=True,
-                       run_string_ops_on_columns=False, error_handler=None, expand_defs=False, remove_definitions=True,
-                       **kwargs):
-        """ Iterate rows based on the given column mapper.
-
-        Parameters:
-            hed_ops (list, func, HedOps, or None):  A func, a HedOps or a list of these to apply to the
-                                                    hed strings before returning.
-            mapper (ColumnMapper or None): The column name to column number mapper (or internal mapper if None).
-            requested_columns(list or None): If this is not None, return ONLY these columns.  Names or numbers allowed.
-            return_string_only (bool): If True, do not return issues list, individual columns, attribute columns, etc.
-            run_string_ops_on_columns (bool):   If true, run all tag and string ops on columns,
-                                                rather than columns then rows.
-            error_handler (ErrorHandler or None):   The error handler to use for context or a default if None.
-            expand_defs (bool):  If True, expand def tags into def-expand groups.
-            remove_definitions (bool): If true, remove all definition tags found.
-            kwargs (kwargs):  See models.hed_ops.translate_ops or the specific hed_ops for additional options.
-
-        Yields:
-            dict:  A dict with parsed row, including keys: "HED", "column_to_hed_tags", and possibly "column_issues".
-
-        """
-        if error_handler is None:
-            error_handler = ErrorHandler()
-
-        if mapper is None:
-            mapper = self._mapper
-
-        if requested_columns:
-            # Make a copy to ensure we don't alter the actual mapper
-            mapper = copy.deepcopy(mapper)
-            mapper.set_requested_columns(requested_columns)
-
-        tag_funcs, string_funcs = self._translate_ops(hed_ops, run_string_ops_on_columns=run_string_ops_on_columns,
-                                                      expand_defs=expand_defs, remove_definitions=remove_definitions,
-                                                      error_handler=error_handler, **kwargs)
-
-        # Iter tuples is ~ 25% faster compared to iterrows in our use case
-        for row_number, text_file_row in enumerate(self._dataframe.itertuples(index=False)):
-            error_handler.push_error_context(ErrorContext.ROW, row_number)
-            yield self._expand_row_internal(text_file_row, tag_funcs, string_funcs,
-                                            error_handler=error_handler,
-                                            mapper=mapper, return_string_only=return_string_only)
-            error_handler.pop_error_context()
-
-    def _expand_row_internal(self, text_file_row, tag_funcs, string_funcs, error_handler,
-                             mapper=None, return_string_only=False):
-        row_dict = mapper.expand_row_tags(text_file_row)
-        column_to_hed_tags = row_dict[model_constants.COLUMN_TO_HED_TAGS]
-        expansion_column_issues = row_dict.get(model_constants.COLUMN_ISSUES, {})
-
-        row_issues = []
-        if tag_funcs:
-            row_issues += self._run_column_ops(column_to_hed_tags, tag_funcs,
-                                               expansion_column_issues,
-                                               error_handler)
-
-        # Return a combined string if we're also returning columns.
-        if not return_string_only:
-            final_hed_string = HedStringGroup(column_to_hed_tags.values())
-        else:
-            final_hed_string = HedString.from_hed_strings(contents=column_to_hed_tags.values())
-
-        if string_funcs:
-            row_issues += self._run_row_ops(final_hed_string, string_funcs, error_handler)
-
-        if not return_string_only:
-            row_dict[model_constants.ROW_ISSUES] = row_issues
-            row_dict[model_constants.ROW_HED_STRING] = final_hed_string
-            return row_dict
-        # Return a HedString rather than a HedStringGroup
-        return final_hed_string
-
-    def set_cell(self, row_number, column_number, new_string_obj, include_column_prefix_if_exist=False,
-                 tag_form="short_tag"):
+    def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_tag"):
         """ Replace the specified cell with transformed text.
 
         Parameters:
             row_number (int):    The row number of the spreadsheet to set.
             column_number (int): The column number of the spreadsheet to set.
             new_string_obj (HedString): Object with text to put in the given cell.
-            include_column_prefix_if_exist (bool): If True and the column matches one from mapper
-                _column_prefix_dictionary, remove the prefix.
             tag_form (str): Version of the tags (short_tag, long_tag, base_tag, etc)
 
         Notes:
              Any attribute of a HedTag that returns a string is a valid value of tag_form.
-
         """
         if self._dataframe is None:
             raise ValueError("No data frame loaded")
 
-        transform_func = None
-        if not include_column_prefix_if_exist:
-            transform_func = self._mapper.get_prefix_remove_func(column_number)
-
-        new_text = new_string_obj.get_as_form(tag_form, transform_func)
+        new_text = new_string_obj.get_as_form(tag_form)
         self._dataframe.iloc[row_number, column_number] = new_text
 
     def get_worksheet(self, worksheet_name=None):
@@ -412,47 +306,6 @@ def get_worksheet(self, worksheet_name=None):
         else:
             return None
 
-    def get_def_and_mapper_issues(self, error_handler, check_for_warnings=False):
-        """ Return definition and column issues.
-
-        Parameters:
-            error_handler (ErrorHandler): The error handler to use.
-            check_for_warnings (bool): If True check for and return warnings as well as errors.
-
-        Returns:
-            dict: A list of definition and mapping issues. Each issue is a dictionary.
-
-        """
-        issues = []
-        issues += self.file_def_dict.get_definition_issues()
-
-        # Gather any issues from the mapper for things like missing columns.
-        mapper_issues = self._mapper.get_column_mapping_issues()
-        error_handler.add_context_to_issues(mapper_issues)
-        issues += mapper_issues
-        if not check_for_warnings:
-            issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
-        return issues
-
-    def _get_processed_copy(self):
-        """ Return a processed copy of this file.
-
-        Returns:
-            BaseInput: The copy.
-
-        Notes:
-             Processing includes definitions replaced, columns expanded, etc.
-
-        """
-        output_file = copy.deepcopy(self)
-        for row_number, row_dict in enumerate(self.iter_dataframe(return_string_only=False)):
-            column_to_hed_tags_dictionary = row_dict[model_constants.COLUMN_TO_HED_TAGS]
-            for column_number in column_to_hed_tags_dictionary:
-                new_text = column_to_hed_tags_dictionary[column_number]
-                output_file.set_cell(row_number, column_number, new_text, tag_form="short_tag")
-
-        return output_file
-
     @staticmethod
     def _get_dataframe_from_worksheet(worksheet, has_headers):
         """ Create a dataframe from the worksheet.
@@ -474,139 +327,91 @@ def _get_dataframe_from_worksheet(worksheet, has_headers):
         else:
             return pandas.DataFrame(worksheet.values, dtype=str)
 
-    def _run_validators(self, hed_ops, error_handler, expand_defs=False, **kwargs):
-        validation_issues = []
-        for row_dict in self.iter_dataframe(hed_ops=hed_ops,
-                                            return_string_only=False,
-                                            error_handler=error_handler, expand_defs=expand_defs,
-                                            **kwargs):
-            validation_issues += row_dict[model_constants.ROW_ISSUES]
-
-        return validation_issues
-
-    def _run_column_ops(self, column_to_hed_tags_dictionary, column_ops, expansion_column_issues, error_handler):
-        validation_issues = []
-        if column_to_hed_tags_dictionary:
-            for column_number, column_hed_string in column_to_hed_tags_dictionary.items():
-                new_column_issues = []
-                error_handler.push_error_context(ErrorContext.COLUMN, column_number)
-                if column_hed_string is not None:
-                    error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string,
-                                                     increment_depth_after=False)
-                if column_number in expansion_column_issues:
-                    new_column_issues += expansion_column_issues[column_number]
-
-                if column_hed_string is not None:
-                    new_column_issues += column_hed_string.apply_funcs(column_ops)
-                error_handler.add_context_to_issues(new_column_issues)
-                if column_hed_string is not None:
-                    error_handler.pop_error_context()
-                error_handler.pop_error_context()
-                validation_issues += new_column_issues
-
-        return validation_issues
-
-    def _run_row_ops(self, row_hed_string, row_ops, error_handler):
-        error_handler.push_error_context(ErrorContext.HED_STRING, row_hed_string, increment_depth_after=False)
-        row_issues = row_hed_string.apply_funcs(row_ops)
-        error_handler.add_context_to_issues(row_issues)
-        error_handler.pop_error_context()
-        return row_issues
-
-    def validate_file(self, hed_ops, name=None, error_handler=None, check_for_warnings=True, **kwargs):
-        """ Run the hed_ops on columns and rows.
+    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None):
+        """Creates a SpreadsheetValidator and returns all issues with this fil
 
         Parameters:
-            hed_ops (func, HedOps, or list of func and/or HedOps): The HedOps of funcs to apply.
-            name (str): If present, use this as the filename for context, rather than using the actual filename
-                Useful for temp filenames.
-            error_handler (ErrorHandler or None): Used to report errors a default one if None.
-            check_for_warnings (bool): If True check for and return warnings as well as errors.
-            kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options.
-
+            hed_schema(HedSchema): The schema to use for validation
+            extra_def_dicts(list of DefDict or DefDict): all definitions to use for validation
+            name(str): The name to report errors from this file as
+            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
         Returns:
-            list: The list of validation issues found. The list elements are dictionaries.
-
+            issues (list of dict): A list of issues for hed string
         """
+        from hed.validator.spreadsheet_validator import SpreadsheetValidator
         if not name:
             name = self.name
-        if not isinstance(hed_ops, list):
-            hed_ops = [hed_ops]
-
-        if error_handler is None:
-            error_handler = ErrorHandler()
-
-        error_handler.push_error_context(ErrorContext.FILE_NAME, name)
-        validation_issues = self.get_def_and_mapper_issues(error_handler, check_for_warnings=check_for_warnings)
-        validation_issues += self._run_validators(hed_ops, error_handler=error_handler,
-                                                  check_for_warnings=check_for_warnings, **kwargs)
-        error_handler.pop_error_context()
-
+        tab_validator = SpreadsheetValidator(hed_schema)
+        validation_issues = tab_validator.validate(self, self._mapper.get_def_dict(hed_schema, extra_def_dicts), name,
+                                                   error_handler=error_handler)
         return validation_issues
 
-    def extract_definitions(self, error_handler=None):
-        """ Gather and validate all definitions.
+    @staticmethod
+    def _dataframe_has_names(dataframe):
+        for column in dataframe.columns:
+            if isinstance(column, str):
+                return True
+        return False
+
+    def assemble(self, mapper=None):
+        """ Assembles the hed strings
 
         Parameters:
-            error_handler (ErrorHandler): The error handler to use for context or a default if None.
+            mapper(ColumnMapper or None): Generally pass none here unless you want special behavior.
 
         Returns:
-            DefinitionDict: Contains all the definitions located in the file.
-
+            Dataframe: the assembled dataframe
         """
-        if error_handler is None:
-            error_handler = ErrorHandler()
-        new_def_dict = DefinitionDict()
-        hed_ops = [self._schema, new_def_dict]
-        for _ in self.iter_dataframe(hed_ops=hed_ops,
-                                     return_string_only=False,
-                                     requested_columns=self._def_columns,
-                                     run_string_ops_on_columns=True,
-                                     remove_definitions=False,
-                                     error_handler=error_handler):
-            pass
-
-        return new_def_dict
-
-    def update_definition_mapper(self, def_dict):
-        """ Add definitions from dict(s) if mapper exists.
+        if mapper is None:
+            mapper = self._mapper
+        import pandas as pd
+        transformers, need_categorical = mapper.get_transformers()
+        if not transformers:
+            return None
+        all_columns = self._dataframe
+        if need_categorical:
+            all_columns[need_categorical] = all_columns[need_categorical].astype('category')
+
+        all_columns = all_columns.transform(transformers)
+
+        possible_column_references = [f"{column_name}" for column_name in self.columns if
+                                      column_name.lower() != "hed"]
+        found_column_references = []
+        for column_name in all_columns:
+            df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
+            u_vals = pd.Series([j for i in df for j in i], dtype=str)
+            u_vals = u_vals.unique()
+            for val in u_vals:
+                if val not in found_column_references:
+                    found_column_references.append(val)
+
+        valid_replacements = [col for col in found_column_references if col in possible_column_references]
+
+        column_names = list(transformers.keys())
+        for column_name in valid_replacements:
+            column_names.remove(column_name)
+        saved_columns = all_columns[valid_replacements]
+        for column_name in column_names:
+            for replacing_name in valid_replacements:
+                column_name_brackets = f"[{replacing_name}]"
+                all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
+                                                     in zip(all_columns[column_name], saved_columns[replacing_name]))
+        all_columns = all_columns[column_names]
+
+        return all_columns
+
+    @staticmethod
+    def combine_dataframe(dataframe):
+        """ Combines all columns in the given dataframe into a single hed string series.
 
         Parameters:
-            def_dict (list or DefinitionDict): Add the DefDict or list of DefDict to the internal definition mapper.
+            dataframe(Dataframe): The dataframe to combine
 
+        Returns:
+            Series: the assembled series
         """
-        if self._def_mapper is not None:
-            self._def_mapper.add_definitions(def_dict)
-
-    def _translate_ops(self, hed_ops, run_string_ops_on_columns, expand_defs, remove_definitions, **kwargs):
-
-        tag_funcs = []
-        string_funcs = []
-        if hed_ops or expand_defs or remove_definitions:
-            if not isinstance(hed_ops, list):
-                hed_ops = [hed_ops]
-            hed_ops = hed_ops.copy()
-            if not run_string_ops_on_columns:
-                self._add_def_onset_mapper(hed_ops)
-                tag_funcs, string_funcs = translate_ops(hed_ops, split_ops=True, hed_schema=self._schema,
-                                                        expand_defs=expand_defs,
-                                                        remove_definitions=remove_definitions,
-                                                        **kwargs)
-            else:
-                tag_funcs = translate_ops(hed_ops, hed_schema=self._schema, expand_defs=expand_defs, **kwargs)
-
-        return tag_funcs, string_funcs
-
-    def _add_def_onset_mapper(self, hed_ops):
-        if not any(isinstance(hed_op, DefMapper) for hed_op in hed_ops):
-            if self._def_mapper:
-                hed_ops.append(self._def_mapper)
-                hed_ops.append(OnsetMapper(self._def_mapper))
-        return hed_ops
+        dataframe = dataframe.agg(', '.join, axis=1)
 
-    @staticmethod
-    def _dataframe_has_names(dataframe):
-        for column in dataframe.columns:
-            if isinstance(column, str):
-                return True
-        return False
+        # Potentially better ways to handle removing n/a by never inserting them to begin with.
+        dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True)
+        return dataframe
diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py
index f6fd12edb..3c4c87a63 100644
--- a/hed/models/column_mapper.py
+++ b/hed/models/column_mapper.py
@@ -1,13 +1,10 @@
 from hed.models.column_metadata import ColumnMetadata, ColumnType
 from hed.models.sidecar import Sidecar
-from hed.models.hed_string import HedString
-from hed.models import model_constants
 from hed.errors.error_reporter import ErrorHandler
 from hed.errors.error_types import ValidationErrors
 
 import copy
 
-
 PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
 
 
@@ -27,6 +24,9 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None
                 Sidecar column definitions will take precedent if there is a conflict with tag_columns.
             column_prefix_dictionary (dict): Dictionary with keys that are column numbers and values are HED tag
                 prefixes to prepend to the tags in that column before processing.
+                May be deprecated.  These are no longer prefixes, but rather converted to value columns.
+                eg. {"key": "Description"} will turn into a value column as {"key": "Description/#"}
+                This means it no longer accepts anything but the value portion only in the columns.
             optional_tag_columns (list): A list of ints or strings containing the columns that contain
                 the HED tags. If the column is otherwise unspecified, convert this column type to HEDTags.
             requested_columns (list or None): A list of columns you wish to retrieve.
@@ -64,11 +64,41 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None
 
         self.set_requested_columns(requested_columns, False)
         self.set_tag_columns(tag_columns, optional_tag_columns, False)
-        self.set_column_prefix_dict(column_prefix_dictionary, False)
+        self._add_value_columns(column_prefix_dictionary)
 
         # finalize the column map based on initial settings with no header
         self._finalize_mapping()
 
+    def get_transformers(self):
+        """ Return the transformers to use on a dataframe
+
+        """
+        final_transformers = {}
+        need_categorical = []
+        for column in self._final_column_map.values():
+            assign_to_column = column.column_name
+            if isinstance(assign_to_column, int):
+                if self._column_map:
+                    assign_to_column = self._column_map[assign_to_column - 1]
+                else:
+                    assign_to_column = assign_to_column - 1
+            if column.column_type == ColumnType.Ignore:
+                continue
+            elif column.column_type == ColumnType.Value:
+                value_str = column._hed_dict
+                from functools import partial
+                final_transformers[assign_to_column] = partial(self._value_handler, value_str)
+            elif column.column_type == ColumnType.Categorical:
+                need_categorical.append(column.column_name)
+                category_values = column._hed_dict
+                from functools import partial
+                final_transformers[assign_to_column] = partial(self._category_handler, category_values)
+            else:
+                final_transformers[assign_to_column] = lambda x: x
+            # print(column.column_type)
+
+        return final_transformers, need_categorical
+
     @staticmethod
     def validate_column_map(column_map, allow_blank_names):
         """ Validate there are no issues with column names.
@@ -89,10 +119,10 @@ def validate_column_map(column_map, allow_blank_names):
             if name is None or name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                 issues += ErrorHandler.format_error(ValidationErrors.HED_BLANK_COLUMN, column_number)
                 continue
-            if name in used_names:
-                # todo: Add this check once it's more fleshed out
-                # issues += ErrorHandler.format_error(ValidationErrors.HED_DUPLICATE_COLUMN, name)
-                continue
+            # if name in used_names:
+            #     # todo: Add this check once it's more fleshed out
+            #     issues += ErrorHandler.format_error(ValidationErrors.HED_DUPLICATE_COLUMN, name)
+            #     continue
             used_names.add(name)
 
         return issues
@@ -116,34 +146,18 @@ def _set_sidecar(self, sidecar):
         self._sidecar = sidecar
 
     def get_tag_columns(self):
-        """ Returns the column numbers that are mapped to be HedTags
+        """ Returns the column numbers or names that are mapped to be HedTags
 
             Note: This is NOT the tag_columns or optional_tag_columns parameter, though they set it.
 
         Returns:
-            column_numbers(list): A list of column numbers that are ColumnType.HedTags
+            column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
+            0-based if integer-based, otherwise column name.
         """
-        return [number for number, column_entry in self._final_column_map.items()
+        return [column_entry.column_name - 1 if isinstance(column_entry.column_name, int) else column_entry.column_name
+                for number, column_entry in self._final_column_map.items()
                 if column_entry.column_type == ColumnType.HEDTags]
 
-    def set_column_prefix_dict(self, column_prefix_dictionary, finalize_mapping=True):
-        """ Replace the column prefix dictionary
-
-        Parameters:
-            column_prefix_dictionary (dict):  Dictionary with keys that are column numbers and values are HED tag
-                prefixes to prepend to the tags in that column before processing.
-            finalize_mapping (bool): Re-generate the internal mapping if True, otherwise no effect until finalize.
-
-        Returns:
-            list:  List of issues that occurred during this process. Each issue is a dictionary.
-
-        """
-        if column_prefix_dictionary:
-            self._column_prefix_dictionary = column_prefix_dictionary
-        if finalize_mapping:
-            return self._finalize_mapping()
-        return []
-
     def set_tag_columns(self, tag_columns=None, optional_tag_columns=None, finalize_mapping=True):
         """ Set tag columns and optional tag columns
 
@@ -222,88 +236,15 @@ def add_columns(self, column_names_or_numbers, column_type=ColumnType.HEDTags):
                 new_def = ColumnMetadata(column_type, column_name)
                 self._add_column_data(new_def)
 
-    def _expand_column(self, column_number, input_text):
-        """ Expand the specified text based on the rules for expanding the specified column.
-
-        Parameters:
-            column_number (int): The column number this text should be treated as from.
-            input_text (str): The text to expand, generally from a single cell of a spreadsheet.
-
-        Returns:
-            str or None: The text after expansion or None if this column is undefined or the given text is null.
-            False or str: Depends on the value of first return value. If None, this is an error message.
-                If string, this is an attribute name that should be stored separately.
-
-        """
-
-        # Default 1-1 mapping if we don't have specific behavior.
-        if self._no_mapping_info:
-            return HedString(input_text), False
-
-        # If no entry, ignore this column.
-        if column_number not in self._final_column_map:
-            return None, False
-
-        if not input_text or input_text in self._na_patterns:
-            return None, False
-
-        column_entry = self._final_column_map[column_number]
-        return column_entry.expand(input_text)
-
-    def expand_row_tags(self, row_text):
-        """ Expand all mapped columns for row.
-
-        Parameters:
-            row_text (list): The text for the given row, one list entry per column number.
-
-        Returns:
-            dict: A dictionary containing the keys COLUMN_TO_HED_TAGS, COLUMN_ISSUES.
-
-        Notes:
-            - The "column_to_hed_tags" is each expanded column given separately as a list of HedStrings.
-            - Attributes are any column identified as an attribute.
-              They will appear in the return value as {attribute_name: value_of_column}
-
-        """
-        result_dict = {}
-        column_to_hed_tags_dictionary = {}
-        column_issues_dict = {}
-        for column_number, cell_text in enumerate(row_text):
-            translated_column, translation_errors = self._expand_column(column_number, str(cell_text))
-            if translated_column is None:
-                if translation_errors:
-                    if column_number not in column_issues_dict:
-                        column_issues_dict[column_number] = []
-                    column_issues_dict[column_number] += translation_errors
-                    column_to_hed_tags_dictionary[column_number] = translated_column
-                continue
-
-            column_to_hed_tags_dictionary[column_number] = translated_column
-
-        result_dict[model_constants.COLUMN_TO_HED_TAGS] = column_to_hed_tags_dictionary
-        if column_issues_dict:
-            result_dict[model_constants.COLUMN_ISSUES] = column_issues_dict
-
-        return result_dict
-
-    def get_prefix_remove_func(self, column_number):
-        """ Return a function to removes name prefixes for column
-
-        Parameters:
-            column_number (int): Column number to look up in the prefix dictionary.
-
-        Returns:
-            func: A function taking a tag and string, returning a string.
-
-        """
-        if column_number not in self._final_column_map:
-            return None
-
-        entry = self._final_column_map[column_number]
-        if not entry.column_prefix:
-            return None
-
-        return entry.remove_prefix
+    def _add_value_columns(self, column_prefix_dictionary):
+        if column_prefix_dictionary:
+            for col, prefix in column_prefix_dictionary.items():
+                if prefix.endswith("/"):
+                    prefix = prefix + "#"
+                else:
+                    prefix = prefix + "/#"
+                new_def = ColumnMetadata(ColumnType.Value, col, hed_dict=prefix)
+                self._add_column_data(new_def)
 
     def _add_column_data(self, new_column_entry):
         """ Add the metadata of a column to this column mapper.
@@ -318,34 +259,6 @@ def _add_column_data(self, new_column_entry):
         column_name = new_column_entry.column_name
         self.column_data[column_name] = copy.deepcopy(new_column_entry)
 
-    @staticmethod
-    def _set_column_prefix(final_map, column_number, new_required_prefix):
-        """ Internal function to add this as a required name_prefix to a column
-
-        Parameters:
-            final_map (dict): {column_number:prefix} Dict of column numbers with prefixes
-            column_number (int): The column number with this name_prefix.
-            new_required_prefix (str): The name_prefix to add to the column when loading from a spreadsheet.
-
-        Raises:
-            TypeError if column number is passed as a str rather an int.
-
-        Notes:
-            If the column is not known to the mapper, it will be added as a HEDTags column.
-
-        """
-        if isinstance(column_number, str):
-            raise TypeError("Must pass in a column number not column_name to _set_column_prefix")
-        if column_number not in final_map:
-            column_entry = ColumnMetadata(ColumnType.HEDTags)
-            final_map[column_number] = column_entry
-        else:
-            column_entry = final_map[column_number]
-
-        column_entry.column_prefix = new_required_prefix
-        if column_entry.column_type is None or column_entry.column_type == ColumnType.Ignore:
-            column_entry.column_type = ColumnType.HEDTags
-
     @staticmethod
     def _get_basic_final_map(column_map, column_data):
         basic_final_map = {}
@@ -456,15 +369,14 @@ def _finalize_mapping(self):
         issues += self._add_tag_columns(final_map, unhandled_names, all_tag_columns, required_tag_columns,
                                         self._warn_on_missing_column)
 
-        # Add prefixes
-        for column_number, prefix in self._column_prefix_dictionary.items():
-            self._set_column_prefix(final_map, column_number, prefix)
-
         issues += ColumnMapper.validate_column_map(self._column_map.values(), allow_blank_names=False)
 
         self._final_column_map = self._filter_by_requested(final_map, self._requested_columns)
+        # Make sure this new dict is sorted
+        self._final_column_map = dict(sorted(final_map.items()))
 
         self._no_mapping_info = not self._check_if_mapping_info()
+
         self._finalize_mapping_issues = issues
         return issues
 
@@ -479,15 +391,19 @@ def _column_name_requested(self, column_name):
             return True
         return column_name in self._requested_columns
 
-    def get_def_dicts(self):
+    def get_def_dict(self, hed_schema=None, extra_def_dicts=None):
         """ Return def dicts from every column description.
 
-        Returns:
-           list:   A list of DefinitionDict objects corresponding to each column entry.
+        Parameters:
+            hed_schema (Schema or None): A HED schema object to use for extracting definitions.
+            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.
 
+        Returns:
+           DefinitionDict:   A single definition dict representing all the data(and extra def dicts)
         """
         if self._sidecar:
-            return self._sidecar.get_def_dicts()
+            return self._sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)
+
         return []
 
     def get_column_mapping_issues(self):
@@ -498,3 +414,14 @@ def get_column_mapping_issues(self):
 
         """
         return self._finalize_mapping_issues
+
+    @staticmethod
+    def _category_handler(category_values, x):
+        return category_values.get(x, "")
+
+    @staticmethod
+    def _value_handler(value_str, x):
+        if x == "n/a":
+            return "n/a"
+
+        return value_str.replace("#", str(x))
diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py
index 3921b5b82..ecdc76f08 100644
--- a/hed/models/column_metadata.py
+++ b/hed/models/column_metadata.py
@@ -1,11 +1,9 @@
 from enum import Enum
-from hed.models.hed_string import HedString
-from hed.errors.error_types import SidecarErrors, ValidationErrors
-from hed.errors.error_reporter import ErrorHandler
+from hed.errors.error_types import SidecarErrors
 
 
 class ColumnType(Enum):
-    """ The overall column_type of a column in column mapper, eg treat it as HED tags.
+    """ The overall column_type of a column in column mapper, e.g. treat it as HED tags.
 
         Mostly internal to column mapper related code
     """
@@ -14,7 +12,7 @@ class ColumnType(Enum):
     Ignore = "ignore"
     # This column is a category with a list of possible values to replace with hed strings.
     Categorical = "categorical"
-    # This column has a value(eg filename) that is added to a hed tag in place of a # sign.
+    # This column has a value(e.g. filename) that is added to a hed tag in place of a # sign.
     Value = "value"
     # Return this column exactly as given, it is HED tags.
     HEDTags = "hed_tags"
@@ -58,105 +56,6 @@ def hed_dict(self):
         """
         return self._hed_dict
 
-    def _get_category_hed_string(self, category):
-        """ Fetch the hed string for a category key.
-
-        Parameters:
-            category (str): The category key to retrieve the string from.
-
-        Returns:
-            str: The hed string for a given category entry in a category column.
-
-        """
-        if self.column_type != ColumnType.Categorical:
-            return None
-
-        return self._hed_dict.get(category, None)
-
-    def _get_value_hed_string(self):
-        """ Fetch the hed string in a value column.
-
-        Returns:
-            str: The hed string for a given value column.
-
-        """
-        if self.column_type != ColumnType.Value:
-            return None
-
-        return self._hed_dict
-
-    def expand(self, input_text):
-        """ Expand text using the rules for this column.
-
-        Parameters:
-            input_text (str): Text to expand (generally from a single cell in a spreadsheet).
-
-        Returns:
-            str or None: The expanded column as a hed_string.
-            str or dict: If this is a string, contains the name of this column
-                as an attribute. If the first return value is None, this is an error message dictionary.
-
-        Notes:
-            - Examples are adding name_prefix, inserting a column hed_string from a category key, etc.
-
-        """
-        column_type = self.column_type
-
-        if column_type == ColumnType.Categorical:
-            final_text = self._get_category_hed_string(input_text)
-            if final_text:
-                return HedString(final_text), False
-            else:
-                return None, ErrorHandler.format_error(ValidationErrors.HED_SIDECAR_KEY_MISSING, invalid_key=input_text,
-                                                       category_keys=list(self._hed_dict.keys()))
-        elif column_type == ColumnType.Value:
-            prelim_text = self._get_value_hed_string()
-            final_text = prelim_text.replace("#", input_text)
-            return HedString(final_text), False
-        elif column_type == ColumnType.HEDTags:
-            hed_string_obj = HedString(input_text)
-            self._prepend_required_prefix(hed_string_obj, self.column_prefix)
-            return hed_string_obj, False
-        elif column_type == ColumnType.Ignore:
-            return None, False
-
-        return None, {"error_type": "INTERNAL_ERROR"}
-
-    @staticmethod
-    def _prepend_required_prefix(required_tag_column_tags, required_tag_prefix):
-        """ Prepend the tag paths to the required tag column tags that need them.
-
-        Parameters:
-            required_tag_column_tags (HedString): A string containing HED tags associated with a
-                required tag column that may need a tag name_prefix prepended to its tags.
-            required_tag_prefix (str): A string that will be added if missing to any given tag.
-        """
-        if not required_tag_prefix:
-            return required_tag_column_tags
-
-        for tag in required_tag_column_tags.get_all_tags():
-            tag.add_prefix_if_needed(required_tag_prefix)
-
-        return required_tag_column_tags
-
-    def remove_prefix(self, original_tag, current_tag_text):
-        """ Remove column_prefix if present from tag.
-
-        Parameters:
-            original_tag (HedTag): The original hed tag being written.
-            current_tag_text (str): A single tag as a string, in any form.
-
-        Returns:
-            str: current_tag_text with required prefixes removed
-        """
-        prefix_to_remove = self.column_prefix
-        if not prefix_to_remove:
-            return current_tag_text
-
-        if current_tag_text.lower().startswith(prefix_to_remove.lower()):
-            current_tag_text = current_tag_text[len(prefix_to_remove):]
-        return current_tag_text
-
     @staticmethod
     def expected_pound_sign_count(column_type):
         """ Return how many pound signs a column string should have.
diff --git a/hed/models/def_mapper.py b/hed/models/def_mapper.py
deleted file mode 100644
index 98b8bbb43..000000000
--- a/hed/models/def_mapper.py
+++ /dev/null
@@ -1,255 +0,0 @@
-from hed.models.hed_string import HedString
-from hed.models.hed_tag import HedTag
-from hed.models.definition_dict import DefinitionDict
-from hed.models.model_constants import DefTagNames
-from hed.errors.error_types import ValidationErrors, DefinitionErrors
-from hed.errors.error_reporter import ErrorHandler
-from hed.models.hed_ops import HedOps
-
-# TODO: should not have print statement when error
-
-
-class DefMapper(HedOps):
-    """ Handles converting Def/ and Def-expand/.
-
-    Notes:
-       - The class provides string funcs but no tag funcs when extending HedOps.
-       - The class can expand or shrink definitions in hed strings via
-         Def/XXX and (Def-expand/XXX ...).
-
-    """
-
-    def __init__(self, def_dicts=None):
-        """ Initialize mapper for definitions in hed strings.
-
-        Parameters:
-            def_dicts (list or DefinitionDict): DefinitionDicts containing the definitions this mapper
-                                                should initialize with.
-
-        Notes:
-            - More definitions can be added later.
-
-        """
-        super().__init__()
-        self._gathered_defs = {}
-        # List of def names we want to be able to quickly purge.
-        self._temporary_def_names = set()
-        self._def_tag_name = DefTagNames.DEFINITION_KEY
-        self._label_tag_name = DefTagNames.DEF_KEY
-        # this only gathers issues with duplicate definitions
-        self._issues = []
-        if def_dicts:
-            self.add_definitions(def_dicts)
-
-    @property
-    def issues(self):
-        return self._issues
-
-    @property
-    def gathered_defs(self):
-        return self._gathered_defs
-
-    def get_def_entry(self, def_name):
-        """ Get the definition entry for the definition name.
-
-        Parameters:
-            def_name (str):  Name of the definition to retrieve.
-
-        Returns:
-            DefinitionEntry:  Definition entry for the requested definition.
-
-        """
-
-        return self._gathered_defs.get(def_name.lower())
-
-    def clear_temporary_definitions(self):
-        """ Remove any previously added temporary definitions. """
-        for def_name in self._temporary_def_names:
-            del self._gathered_defs[def_name]
-        self._temporary_def_names = set()
-
-    def add_definitions_from_string_as_temp(self, hed_string_obj):
-        """ Add definitions from hed string as temporary.
-
-        Parameters:
-            hed_string_obj (HedString):  Hed string object to search for definitions
-
-        Returns:
-            list:  List of issues due to invalid definitions found in this string. Each issue is a dictionary.
-
-        """
-        this_string_def_dict = DefinitionDict()
-        validation_issues = this_string_def_dict.check_for_definitions(hed_string_obj)
-        self.add_definitions(this_string_def_dict, add_as_temp=True)
-        return validation_issues
-
-    def add_definitions(self, def_dicts, add_as_temp=False):
-        """ Add definitions from dict(s) to mapper
-
-        Parameters:
-            def_dicts (list or DefinitionDict): DefDict or list of DefDicts whose definitions should be added.
-            add_as_temp (bool):          If true, mark these new definitions as temporary (easily purged).
-
-        """
-        if not isinstance(def_dicts, list):
-            def_dicts = [def_dicts]
-        for def_dict in def_dicts:
-            if isinstance(def_dict, DefinitionDict):
-                self._add_definitions_from_dict(def_dict, add_as_temp)
-            else:
-                print(f"Invalid input type '{type(def_dict)} passed to DefMapper.  Skipping.")
-
-    def _add_definitions_from_dict(self, def_dict, add_as_temp=False):
-        """ Add the definitions found in the given definition dictionary to this mapper.
-
-         Parameters:
-             def_dict (DefinitionDict): DefDict whose definitions should be added.
-             add_as_temp (bool): If true, mark these new definitions as temporary (easily purged).
-
-        """
-        for def_tag, def_value in def_dict:
-            if def_tag in self._gathered_defs:
-                error_context = self._gathered_defs[def_tag].source_context
-                self._issues += ErrorHandler.format_error_from_context(DefinitionErrors.DUPLICATE_DEFINITION,
-                                                                       error_context=error_context,
-                                                                       def_name=def_tag)
-                continue
-            self._gathered_defs[def_tag] = def_value
-            if add_as_temp:
-                self._temporary_def_names.add(def_tag)
-
-    def expand_def_tags(self, hed_string_obj, expand_defs=True, shrink_defs=False):
-        """ Validate and expand Def/Def-Expand tags.
-
-        Parameters:
-            hed_string_obj (HedString): The hed string to process.
-            expand_defs (bool): If true, convert def tags to def-expand tag groups that include definition content.
-            shrink_defs (bool): If True, replace all def-expand groups with corresponding def tags.
-
-        Returns:
-            list: Issues found related to validating defs. Each issue is a dictionary.
-
-        Notes:
-            - This function can optionally expand or shrink Def/ and Def-expand, respectively.
-            - Usually issues are mismatched placeholders or a missing definition.
-            - The expand_defs and shrink_defs cannot both be True.
-
-        """
-        # First see if the "def" is found at all.  This covers def and def-expand.
-        hed_string_lower = hed_string_obj.lower()
-        if self._label_tag_name not in hed_string_lower:
-            return []
-
-        def_issues = []
-        # We need to check for labels to expand in ALL groups
-        for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
-            def_contents = self._get_definition_contents(def_tag, def_expand_group, def_issues)
-            if def_expand_group is def_tag:
-                if def_contents is not None and expand_defs:
-                    def_tag.short_base_tag = DefTagNames.DEF_EXPAND_ORG_KEY
-                    def_group.replace(def_tag, def_contents)
-            else:
-                if def_contents is not None and shrink_defs:
-                    def_tag.short_base_tag = DefTagNames.DEF_ORG_KEY
-                    def_group.replace(def_expand_group, def_tag)
-
-        return def_issues
-
-    def expand_and_remove_definitions(self, hed_string_obj, check_for_definitions=False, expand_defs=True,
-                                      shrink_defs=False, remove_definitions=True):
-        """ Validate and expand Def/Def-Expand tags.
-
-            Also removes definitions
-
-        Parameters:
-            hed_string_obj (HedString): The string to search for definitions.
-            check_for_definitions  (bool): If True, this will first check the hed string for any definitions.
-            expand_defs (bool):        If True, replace Def tags to Def-expand tag groups.
-            shrink_defs (bool):        If True, replace Def-expand groups with Def tags.
-            remove_definitions (bool): If true, this will remove all Definition tag groups.
-
-        Returns:
-            def_issues (list): A list of issues for definition-related tags in this string. Each issue is a dictionary.
-
-        Notes:
-            - The check_for_definitions is mainly used for individual HedStrings in isolation.
-            - The defs can be expanded or shrunk, while definitions can be removed.
-            - This does not validate definitions, it will blindly remove invalid definitions as well.
-
-        """
-        def_issues = []
-        if check_for_definitions:
-            def_issues += self.add_definitions_from_string_as_temp(hed_string_obj)
-        def_issues += self.expand_def_tags(hed_string_obj, expand_defs=expand_defs, shrink_defs=shrink_defs)
-        if remove_definitions:
-            def_issues += hed_string_obj.remove_definitions()
-        if check_for_definitions:
-            self.clear_temporary_definitions()
-
-        return def_issues
-
-    def _get_definition_contents(self, def_tag, def_expand_group, def_issues):
-        """ Check for issues with expanding a tag from Def to a Def-expand tag group and return the expanded tag group.
-
-        Parameters:
-            def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag.
-            def_expand_group (HedGroup or HedTag):
-            Source group for this def-expand tag.  Same as def_tag if this is not a def-expand tag.
-        def_issues : [{}]
-            List of issues to append any new issues to
-
-        Returns:
-            def_contents: [HedTag or HedGroup]
-            The contents to replace the previous def-tag with.
-        """
-        # todo: This check could be removed for optimizing
-        if def_tag.short_base_tag.lower() != DefTagNames.DEF_EXPAND_KEY and \
-                def_tag.short_base_tag.lower() != DefTagNames.DEF_KEY:
-            raise ValueError("Internal error in DefMapper")
-
-        is_label_tag = def_tag.extension_or_value_portion
-        placeholder = None
-        found_slash = is_label_tag.find("/")
-        if found_slash != -1:
-            placeholder = is_label_tag[found_slash + 1:]
-            is_label_tag = is_label_tag[:found_slash]
-
-        label_tag_lower = is_label_tag.lower()
-        def_entry = self._gathered_defs.get(label_tag_lower)
-        if def_entry is None:
-            def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=def_tag)
-        else:
-            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
-            if def_tag_name:
-                if def_expand_group is not def_tag and def_expand_group != def_contents:
-                    def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
-                                                            tag=def_tag, actual_def=def_contents,
-                                                            found_def=def_expand_group)
-                    return None
-                return def_contents
-            elif def_entry.takes_value:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=def_tag)
-            else:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_EXTRA, tag=def_tag)
-
-        return None
-
-    def __get_string_funcs__(self, **kwargs):
-        """ String funcs for processing definitions. """
-        string_funcs = []
-        expand_defs = kwargs.get("expand_defs")
-        shrink_defs = kwargs.get("shrink_defs")
-        remove_definitions = kwargs.get("remove_definitions")
-        check_for_definitions = kwargs.get("check_for_definitions")
-        if shrink_defs and expand_defs:
-            raise ValueError("Cannot pass both shrink_defs and expand_defs to DefMapper")
-        from functools import partial
-        string_funcs.append(partial(self.expand_and_remove_definitions,
-                                    check_for_definitions=check_for_definitions,
-                                    expand_defs=expand_defs,
-                                    shrink_defs=shrink_defs,
-                                    remove_definitions=remove_definitions))
-        return string_funcs
-
-    def __get_tag_funcs__(self, **kwargs):
-        return []
diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
index 13d0f083b..ca3b06b34 100644
--- a/hed/models/definition_dict.py
+++ b/hed/models/definition_dict.py
@@ -2,36 +2,60 @@
 from hed.models.hed_string import HedString
 from hed.errors.error_types import DefinitionErrors
 from hed.errors.error_reporter import ErrorHandler
-from functools import partial
-
 from hed.models.model_constants import DefTagNames
-from hed.models.hed_ops import HedOps
 
 
-class DefinitionDict(HedOps):
+class DefinitionDict:
     """ Gathers definitions from a single source.
 
-        This class extends HedOps because it has string_funcs to check for definitions. It has no tag_funcs.
-
     """
 
-    def __init__(self):
+    def __init__(self, def_dicts=None, hed_schema=None):
         """ Definitions to be considered a single source. """
 
-        super().__init__()
         self.defs = {}
+        self._label_tag_name = DefTagNames.DEF_KEY
+        self._issues = []
+        if def_dicts:
+            self.add_definitions(def_dicts, hed_schema)
+
+    def add_definitions(self, def_dicts, hed_schema=None):
+        """ Add definitions from dict(s) to this dict.
+
+        Parameters:
+            def_dicts (list or DefinitionDict): DefDict or list of DefDicts/strings whose definitions should be added.
+            hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.
+        """
+        if not isinstance(def_dicts, list):
+            def_dicts = [def_dicts]
+        for def_dict in def_dicts:
+            if isinstance(def_dict, DefinitionDict):
+                self._add_definitions_from_dict(def_dict)
+            elif isinstance(def_dict, str) and hed_schema:
+                self.check_for_definitions(HedString(def_dict, hed_schema))
+            elif isinstance(def_dict, list) and hed_schema:
+                for definition in def_dict:
+                    self.check_for_definitions(HedString(definition, hed_schema))
+            else:
+                print(f"Invalid input type '{type(def_dict)} passed to DefDict.  Skipping.")
 
-        # Definition related issues
-        self._extract_def_issues = []
+    def _add_definition(self, def_tag, def_value):
+        if def_tag in self.defs:
+            error_context = self.defs[def_tag].source_context
+            self._issues += ErrorHandler.format_error_from_context(DefinitionErrors.DUPLICATE_DEFINITION,
+                                                                   error_context=error_context, def_name=def_tag)
+        else:
+            self.defs[def_tag] = def_value
 
-    def get_definition_issues(self):
-        """ Return definition errors found during extraction.
+    def _add_definitions_from_dict(self, def_dict):
+        """ Add the definitions found in the given definition dictionary to this mapper.
 
-        Returns:
-            list: List of DefinitionErrors issues found. Each issue is a dictionary.
+         Parameters:
+             def_dict (DefinitionDict): DefDict whose definitions should be added.
 
         """
-        return self._extract_def_issues
+        for def_tag, def_value in def_dict:
+            self._add_definition(def_tag, def_value)
 
     def get(self, def_name):
         return self.defs.get(def_name.lower())
@@ -39,12 +63,23 @@ def get(self, def_name):
     def __iter__(self):
         return iter(self.defs.items())
 
-    def __get_string_funcs__(self, **kwargs):
-        error_handler = kwargs.get("error_handler")
-        return [partial(self.check_for_definitions, error_handler=error_handler)]
+    @property
+    def issues(self):
+        """Returns issues about duplicate definitions."""
+        return self._issues
+
+    def get_def_entry(self, def_name):
+        """ Get the definition entry for the definition name.
+
+        Parameters:
+            def_name (str):  Name of the definition to retrieve.
+
+        Returns:
+            DefinitionEntry:  Definition entry for the requested definition.
+
+        """
 
-    def __get_tag_funcs__(self, **kwargs):
-        return []
+        return self.defs.get(def_name.lower())
 
     def check_for_definitions(self, hed_string_obj, error_handler=None):
         """ Check string for definition tags, adding them to self.
@@ -128,9 +163,84 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):
                                                        takes_value=def_takes_value,
                                                        source_context=context)
 
-        self._extract_def_issues += new_def_issues
         return new_def_issues
 
+    def construct_def_tags(self, hed_string_obj):
+        """ Identify def/def-expand tag contents in the given string.
+
+        Parameters:
+            hed_string_obj(HedString): The hed string to identify definition contents in
+        """
+        for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
+            def_contents = self._get_definition_contents(def_tag)
+            if def_contents is not None:
+                def_tag._expandable = def_contents
+                def_tag._expanded = def_tag != def_expand_group
+
+    def construct_def_tag(self, hed_tag):
+        """ Identify def/def-expand tag contents in the given HedTag.
+
+        Parameters:
+            hed_tag(HedTag): The hed tag to identify definition contents in
+        """
+        if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}:
+            def_contents = self._get_definition_contents(hed_tag)
+            if def_contents is not None:
+                hed_tag._expandable = def_contents
+                hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY
+
+    def expand_def_tags(self, hed_string_obj):
+        """ Expands def tags to def-expand tags.
+
+        Parameters:
+            hed_string_obj (HedString): The hed string to process.
+        """
+        # First see if the "def" is found at all.  This covers def and def-expand.
+        hed_string_lower = hed_string_obj.lower()
+        if self._label_tag_name not in hed_string_lower:
+            return []
+
+        def_issues = []
+        # We need to check for labels to expand in ALL groups
+        for def_tag, def_group in hed_string_obj.find_tags(DefTagNames.DEF_KEY, recursive=True):
+            def_contents = self._get_definition_contents(def_tag)
+            if def_contents is not None:
+                def_tag.short_base_tag = DefTagNames.DEF_EXPAND_ORG_KEY
+                def_group.replace(def_tag, def_contents)
+
+        return def_issues
+
+    def _get_definition_contents(self, def_tag):
+        """ Get the contents for a given def tag.
+
+            Does not validate at all.
+
+        Parameters:
+            def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag.
+
+        Returns:
+            def_contents: HedGroup
+            The contents to replace the previous def-tag with.
+        """
+        is_label_tag = def_tag.extension_or_value_portion
+        placeholder = None
+        found_slash = is_label_tag.find("/")
+        if found_slash != -1:
+            placeholder = is_label_tag[found_slash + 1:]
+            is_label_tag = is_label_tag[:found_slash]
+
+        label_tag_lower = is_label_tag.lower()
+        def_entry = self.defs.get(label_tag_lower)
+        if def_entry is None:
+            # Could raise an error here?
+            return None
+        else:
+            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
+            if def_tag_name:
+                return def_contents
+
+        return None
+
     @staticmethod
     def get_as_strings(def_dict):
         """ Convert the entries to strings of the contents
@@ -145,5 +255,3 @@ def get_as_strings(def_dict):
             def_dict = def_dict.defs
 
         return {key: str(value.contents) for key, value in def_dict.items()}
-
-
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
new file mode 100644
index 000000000..b7e73a282
--- /dev/null
+++ b/hed/models/df_util.py
@@ -0,0 +1,125 @@
+from functools import partial
+
+from hed.models.sidecar import Sidecar
+from hed.models.tabular_input import TabularInput
+from hed import HedString
+
+
+def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
+                  shrink_defs=False, expand_defs=True):
+    """Load a tabular file and its associated HED sidecar file.
+
+    Args:
+        tabular_file: str or TabularInput
+            The path to the tabular file, or a TabularInput object representing it.
+        sidecar: str or Sidecar
+            The path to the sidecar file, or a Sidecar object representing it.
+        hed_schema: str or HedSchema
+            If str, will attempt to load as a version if it doesn't have a valid extension.
+        extra_def_dicts: list of DefinitionDict, optional
+            Any extra DefinitionDict objects to use when parsing the HED tags.
+        join_columns: bool
+            If true, join all hed columns into one.
+        shrink_defs: bool
+            Shrink any def-expand tags found
+        expand_defs: bool
+            Expand any def tags found
+    Returns:
+        A list of HedStrings, or a list of lists of HedStrings
+    """
+    if isinstance(sidecar, str):
+        sidecar = Sidecar(sidecar)
+
+    if isinstance(tabular_file, str):
+        tabular_file = TabularInput(tabular_file, sidecar)
+
+    def_dict = None
+    if sidecar:
+        def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)
+
+    if join_columns:
+        if expand_defs:
+            return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
+        elif shrink_defs:
+            return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
+        else:
+            return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
+    else:
+        return [[HedString(x, hed_schema, def_dict).expand_defs() if expand_defs
+                 else HedString(x, hed_schema, def_dict).shrink_defs() if shrink_defs
+                 else HedString(x, hed_schema, def_dict)
+                 for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], def_dict
+
+
+def convert_to_form(df, hed_schema, tag_form, columns):
+    """ Convert all tags in underlying dataframe to the specified form.
+
+        Converts in place
+    Parameters:
+        df (pd.Dataframe): The dataframe to modify
+        hed_schema (HedSchema): The schema to use to convert tags.
+        tag_form(str): HedTag property to convert tags to.
+        columns (list): The columns to modify on the dataframe
+    """
+    if columns is None:
+        columns = df.columns
+
+    for column in columns:
+        df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
+
+    return df
+
+
+def shrink_defs(df, hed_schema, columns):
+    """ Shrinks any def-expand tags found in the dataframe.
+
+        Converts in place
+    Parameters:
+        df (pd.Dataframe): The dataframe to modify
+        hed_schema (HedSchema or None): The schema to use to identify defs.
+        columns (list): The columns to modify on the dataframe
+    """
+    if columns is None:
+        columns = df.columns
+
+    for column in columns:
+        mask = df[column].str.contains('Def-expand/', case=False)
+        df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
+
+    return df
+
+
+def expand_defs(df, hed_schema, def_dict, columns):
+    """ Expands any def tags found in the dataframe.
+
+        Converts in place
+
+    Parameters:
+        df (pd.Dataframe): The dataframe to modify
+        hed_schema (HedSchema or None): The schema to use to identify defs
+        def_dict (DefinitionDict): The definitions to expand
+        columns (list): The columns to modify on the dataframe
+    """
+    if columns is None:
+        columns = df.columns
+
+    for column in columns:
+        mask = df[column].str.contains('Def/', case=False)
+        df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
+
+    return df
+
+
+def _convert_to_form(hed_string, hed_schema, tag_form):
+    from hed import HedString
+    return str(HedString(hed_string, hed_schema).get_as_form(tag_form))
+
+
+def _shrink_defs(hed_string, hed_schema):
+    from hed import HedString
+    return str(HedString(hed_string, hed_schema).shrink_defs())
+
+
+def _expand_defs(hed_string, hed_schema, def_dict):
+    from hed import HedString
+    return str(HedString(hed_string, hed_schema, def_dict).expand_defs())
diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py
index 68c4e7f59..8a9806d42 100644
--- a/hed/models/expression_parser.py
+++ b/hed/models/expression_parser.py
@@ -1,7 +1,6 @@
 import re
 
 
-# todo: Add support for early outs with and(only try to match groups we already matched instead of all groups)
 class search_result:
     def __init__(self, group, tag):
         self.group = group
@@ -179,8 +178,6 @@ def handle_expr(self, hed_group, exact=False):
                         continue
                     return_list.append(merged_result)
 
-        # finally simplify the list and remove duplicates.
-
         return return_list
 
     def __str__(self):
@@ -193,6 +190,7 @@ def __str__(self):
         output_str += ")"
         return output_str
 
+
 class ExpressionWildcardNew(Expression):
     def handle_expr(self, hed_group, exact=False):
         groups_found = []
diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py
index e61a3d3b3..6df911801 100644
--- a/hed/models/hed_group.py
+++ b/hed/models/hed_group.py
@@ -312,12 +312,11 @@ def get_as_long(self):
         """
         return self.get_as_form("long_tag")
 
-    def get_as_form(self, tag_attribute, tag_transformer=None):
+    def get_as_form(self, tag_attribute):
         """ Get the string corresponding to the specified form.
 
         Parameters:
             tag_attribute (str): The hed_tag property to use to construct the string (usually short_tag or long_tag).
-            tag_transformer (func or None): A function that is applied to each tag string before returning.
 
         Returns:
             str: The constructed string after transformation
@@ -326,13 +325,8 @@ def get_as_form(self, tag_attribute, tag_transformer=None):
             - The signature of a tag_transformer is str def(HedTag, str).
 
         """
-        if tag_transformer:
-            result = ",".join([tag_transformer(child, child.__getattribute__(tag_attribute))
-                               if isinstance(child, HedTag) else child.get_as_form(tag_attribute, tag_transformer)
-                               for child in self.children])
-        else:
-            result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
-                               child.get_as_form(tag_attribute) for child in self.children])
+        result = ",".join([child.__getattribute__(tag_attribute) if isinstance(child, HedTag) else
+                           child.get_as_form(tag_attribute) for child in self.children])
         if self.is_group:
             return f"({result})"
         return result
@@ -365,6 +359,8 @@ def __eq__(self, other):
         if self is other:
             return True
 
+        if isinstance(other, str):
+            return str(self) == other
         if not isinstance(other, HedGroup) or self.children != other.children or self.is_group != other.is_group:
             return False
         return True
@@ -484,9 +480,9 @@ def find_def_tags(self, recursive=False, include_groups=3):
         """ Find def and def-expand tags
         Parameters:
             recursive (bool): If true, also check subgroups.
-            include_groups (int, 0, 1, 2, 3): options for how to expand or include groups
+            include_groups (int, 0, 1, 2, 3): options for return values
         Returns:
-            list: A list of tuples. The contents depends on the values of the include group.
+            list: A list of tuples. The contents depend on the values of the include_group.
         Notes:
             - The include_groups option controls the tag expansion as follows:
                 - If 0: Return only def and def expand tags/.
diff --git a/hed/models/hed_ops.py b/hed/models/hed_ops.py
deleted file mode 100644
index c56c93c78..000000000
--- a/hed/models/hed_ops.py
+++ /dev/null
@@ -1,262 +0,0 @@
-""" Infrastructure for processing HED operations. """
-
-from functools import partial
-from hed.schema import HedSchema, HedSchemaGroup
-from hed.errors.error_types import ErrorContext, SidecarErrors
-from hed.errors import ErrorHandler
-
-
-# These are the defaults if you pass in nothing.  Most built in routes will have other default values.
-default_arguments = {
-    'allow_placeholders': False,
-    'check_for_definitions': False,
-    'expand_defs': False,
-    'shrink_defs': False,
-    'error_handler': None,
-    'check_for_warnings': False,
-    'remove_definitions': True
-}
-
-
-def translate_ops(hed_ops, split_ops=False, hed_schema=None, **kwargs):
-    """ Return functions to apply to a hed string object.
-
-    Parameters:
-        hed_ops (list): A list of func or HedOps or HedSchema to apply to hed strings.
-        split_ops (bool): If true, will split the operations into separate lists of tag and string operations.
-        hed_schema(HedSchema or None): The schema to use by default in identifying tags
-        kwargs (kwargs):  An optional dictionary of name-value pairs representing parameters passed to each HedOps
-
-    Returns:
-        list or tuple: A list of functions to apply or a tuple containing separate lists of tag and string ops.
-
-    Notes:
-        - The distinction between tag and string ops primarily applies to spreadsheets.
-        - Splitting the ops into two lists is mainly used for parsing spreadsheets where any given
-            column isn't an entire hed string, but additional detail is needed on which column an
-            issue original came from.
-        - The currently accepted values of kwargs are:
-            - allow_placeholders
-            - check_for_definitions
-            - expand_defs
-            - shrink_defs
-            - error_handler
-            - check_for_warnings
-            - remove_definitions
-
-    """
-    if not isinstance(hed_ops, list):
-        hed_ops = [hed_ops]
-
-    from hed.models.hed_string import HedString
-
-    settings = default_arguments.copy()
-    settings.update(kwargs)
-
-    tag_funcs = []
-    string_funcs = []
-    for hed_op in hed_ops:
-        if hed_op:
-            # Handle the special case of a hed schema.
-            if isinstance(hed_op, (HedSchema, HedSchemaGroup)):
-                tag_funcs.append(partial(HedString.convert_to_canonical_forms, hed_schema=hed_op))
-            else:
-                try:
-                    tag_funcs += hed_op.__get_tag_funcs__(**settings)
-                    string_funcs += hed_op.__get_string_funcs__(**settings)
-                except AttributeError:
-                    string_funcs.append(hed_op)
-
-    # Make sure the first column operation is a convert to forms, if we don't have one.
-    if not _func_in_list(HedString.convert_to_canonical_forms, tag_funcs):
-        tag_funcs.insert(0, partial(HedString.convert_to_canonical_forms, hed_schema=hed_schema))
-
-    if split_ops:
-        return tag_funcs, string_funcs
-    return tag_funcs + string_funcs
-
-
-def apply_ops(hed_strings, hed_ops, **kwargs):
-    """ Convenience function to update a list/dict of hed strings
-
-    Parameters:
-        hed_strings(str, dict, list): A list/dict/str to update
-        hed_ops (list or HedOps or func): A list of func or HedOps or HedSchema to apply to hed strings.
-        kwargs (kwargs):  An optional dictionary of name-value pairs representing parameters passed to each HedOps
-
-    Returns:
-        tuple:
-            hed_strings(str, dict, list): Same type as input
-            issues(list): A list of issues found applying the hed_ops
-    """
-    from hed.models.hed_string import HedString
-
-    if not hed_strings:
-        return hed_strings, []
-    issues = []
-    tag_funcs = translate_ops(hed_ops, **kwargs)
-    if isinstance(hed_strings, str):
-        hed_string_obj = HedString(hed_strings)
-        issues += hed_string_obj.apply_funcs(tag_funcs)
-        return str(hed_string_obj), issues
-    elif isinstance(hed_strings, dict):
-        return_dict = {}
-        for key, hed_string in hed_strings.items():
-            hed_string_obj = HedString(hed_string)
-            issues += hed_string_obj.apply_funcs(tag_funcs)
-            return_dict[key] = str(hed_string_obj)
-        return return_dict, issues
-    elif isinstance(hed_strings, list):
-        return_list = []
-        for hed_string in hed_strings:
-            hed_string_obj = HedString(hed_string)
-            issues += hed_string_obj.apply_funcs(tag_funcs)
-            return_list.append(str(hed_string_obj))
-        return return_list, issues
-
-    raise ValueError("Unaccounted for type in apply_ops")
-
-
-def hed_string_iter(hed_strings, tag_funcs, error_handler):
-    """ Iterate over the given dict of strings, returning HedStrings
-
-        Also gives issues for blank strings
-
-    Parameters:
-        hed_strings(dict or str): A hed_string or dict of hed strings
-        tag_funcs (list of funcs): The functions to apply before returning
-        error_handler (ErrorHandler): The error handler to use for context, uses a default one if none.
-
-    Yields:
-        tuple:
-            - HedString: The hed string at a given column and key position.
-            - str: Indication of the where hed string was loaded from so it can be later set by the user.
-            - list: Issues found applying hed_ops. Each issue is a dictionary.
-
-    """
-    for hed_string_obj, key_name in _hed_iter_low(hed_strings):
-        new_col_issues = []
-        error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
-        if not hed_string_obj:
-            new_col_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING)
-            error_handler.add_context_to_issues(new_col_issues)
-            yield hed_string_obj, key_name, new_col_issues
-        else:
-            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
-                                             increment_depth_after=False)
-            if tag_funcs:
-                new_col_issues += hed_string_obj.apply_funcs(tag_funcs)
-
-            error_handler.add_context_to_issues(new_col_issues)
-            yield hed_string_obj, key_name, new_col_issues
-            error_handler.pop_error_context()
-        error_handler.pop_error_context()
-
-
-def _hed_iter_low(hed_strings):
-    """ Iterate over the hed string entries.
-
-        Used by hed_string_iter
-
-    Parameters:
-        hed_strings(dict or str): A hed_string or dict of hed strings
-
-    Yields:
-        tuple:
-            - HedString: Individual hed strings for different entries.
-            - str: The position to pass back to set this string.
-
-    """
-    from hed.models.hed_string import HedString
-
-    if isinstance(hed_strings, dict):
-        for key, hed_string in hed_strings.items():
-            if isinstance(hed_string, str):
-                hed_string = HedString(hed_string)
-            else:
-                continue
-            yield hed_string, key
-    elif isinstance(hed_strings, str):
-        hed_string = HedString(hed_strings)
-        yield hed_string, None
-
-
-def set_hed_string(new_hed_string, hed_strings, position=None):
-    """ Set a hed string for a category key/etc.
-
-    Parameters:
-        new_hed_string (str or HedString): The new hed_string to replace the value at position.
-        hed_strings(dict or str or HedString): The hed strings we want to update
-        position (str, optional): This should only be a value returned from hed_string_iter.
-
-    Returns:
-        updated_string (str or dict): The newly updated string/dict.
-    Raises:
-        TypeError: If the mapping cannot occur.
-
-    """
-    from hed.models.hed_string import HedString
-
-    if isinstance(hed_strings, dict):
-        if position is None:
-            raise TypeError("Error: Trying to set a category HED string with no category")
-        if position not in hed_strings:
-            raise TypeError("Error: Not allowed to add new categories to a column")
-        hed_strings[position] = str(new_hed_string)
-    elif isinstance(hed_strings, (str, HedString)):
-        if position is not None:
-            raise TypeError("Error: Trying to set a value HED string with a category")
-        hed_strings = str(new_hed_string)
-    else:
-        raise TypeError("Error: Trying to set a HED string on a column_type that doesn't support it.")
-
-    return hed_strings
-
-
-class HedOps:
-    """ Base class to support HedOps.
-
-    Notes:
-        - HED ops are operations that apply to HedStrings in a sequence.
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def __get_string_funcs__(self, **kwargs):
-        """ Return the operations that should be done on the full string at once.
-
-        Parameters:
-            kwargs See above.
-
-        Returns:
-            list: A list of functions that take a single hed string as a parameter, and return a list of issues.
-
-        """
-        return []
-
-    def __get_tag_funcs__(self, **kwargs):
-        """ Return the operations that should be done on the individual tags in the string.
-
-        Parameters:
-            kwargs: See above.
-
-        Returns:
-            list: A list of functions that take a single hed string as a parameter, and return a list of issues.
-
-        """
-        return []
-
-    # Todo: possibly add parameter validation
-    # def __get_valid_parameters__(self):
-    #     return []
-
-
-def _func_in_list(find_func, func_list):
-    for func in func_list:
-        if func == find_func:
-            return True
-        if isinstance(func, partial) and getattr(func, 'func') == find_func:
-            return True
-    return False
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index fee47ea12..fe864b28e 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -3,9 +3,6 @@
 """
 from hed.models.hed_group import HedGroup
 from hed.models.hed_tag import HedTag
-from hed.errors.error_reporter import ErrorHandler, check_for_any_errors
-from hed.errors.error_types import ErrorContext
-from hed.models.hed_ops import translate_ops
 from hed.models.model_constants import DefTagNames
 
 
@@ -15,7 +12,7 @@ class HedString(HedGroup):
     OPENING_GROUP_CHARACTER = '('
     CLOSING_GROUP_CHARACTER = ')'
 
-    def __init__(self, hed_string, hed_schema=None, _contents=None):
+    def __init__(self, hed_string, hed_schema=None, def_dict=None, _contents=None):
         """ Constructor for the HedString class.
 
         Parameters:
@@ -32,7 +29,7 @@ def __init__(self, hed_string, hed_schema=None, _contents=None):
             contents = _contents
         else:
             try:
-                contents = self.split_into_groups(hed_string, hed_schema)
+                contents = self.split_into_groups(hed_string, hed_schema, def_dict)
             except ValueError:
                 contents = []
         super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string))
@@ -59,10 +56,8 @@ def is_group(self):
     def convert_to_canonical_forms(self, hed_schema):
         """ Identify all tags using the given schema.
 
-            If schema is None, still identify "key" tags such as definitions.
-
         Parameters:
-            hed_schema (HedSchema, HedSchemaGroup, None): The schema to use to validate/convert tags.
+            hed_schema (HedSchema, HedSchemaGroup): The schema to use to validate/convert tags.
 
         Returns:
             list: A list of issues found while converting the string. Each issue is a dictionary.
@@ -89,6 +84,43 @@ def remove_definitions(self):
 
         return []
 
+    def shrink_defs(self):
+        """ Replace def-expand tags with def tags
+
+            This does not validate them and will blindly shrink invalid ones as well.
+
+        Returns:
+            self
+        """
+        for def_expand_tag, def_expand_group in self.find_tags({DefTagNames.DEF_EXPAND_KEY}, recursive=True):
+            expanded_parent = def_expand_group._parent
+            if expanded_parent:
+                def_expand_tag.short_base_tag = DefTagNames.DEF_ORG_KEY
+                expanded_parent.replace(def_expand_group, def_expand_tag)
+
+        return self
+
+    def expand_defs(self):
+        """ Replace def tags with def-expand tags
+
+            This does very minimal validation
+
+        Returns:
+            self
+        """
+        def_tags = self.find_def_tags(recursive=True, include_groups=0)
+
+        replacements = []
+        for tag in def_tags:
+            if not tag._expanded:
+                replacements.append((tag, tag._expandable))
+
+        for tag, group in replacements:
+            self.replace(tag, group)
+            tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY
+
+        return self
+
     def convert_to_short(self, hed_schema):
         """ Compute canonical forms and return the short form.
 
@@ -140,13 +172,13 @@ def convert_to_original(self):
         return self.get_as_form("org_tag")
 
     @staticmethod
-    def split_into_groups(hed_string, hed_schema=None):
+    def split_into_groups(hed_string, hed_schema=None, def_dict=None):
         """ Split the HED string into a parse tree.
 
         Parameters:
             hed_string (str): A hed string consisting of tags and tag groups to be processed.
-            hed_schema (HedSchema or None): Hed schema to use to identify tags.
-
+            hed_schema (HedSchema or None): HED schema to use to identify tags.
+            def_dict(DefinitionDict): The definitions to identify
         Returns:
             list:  A list of HedTag and/or HedGroup.
 
@@ -162,7 +194,7 @@ def split_into_groups(hed_string, hed_schema=None):
         input_tags = HedString.split_hed_string(hed_string)
         for is_hed_tag, (startpos, endpos) in input_tags:
             if is_hed_tag:
-                new_tag = HedTag(hed_string, (startpos, endpos), hed_schema)
+                new_tag = HedTag(hed_string, (startpos, endpos), hed_schema, def_dict)
                 current_tag_group[-1].append(new_tag)
             else:
                 string_portion = hed_string[startpos:endpos]
@@ -178,6 +210,8 @@ def split_into_groups(hed_string, hed_schema=None):
                     current_tag_group.append(HedGroup(hed_string, startpos + delimiter_index))
 
                 if delimiter_char is HedString.CLOSING_GROUP_CHARACTER:
+                    # if prev_delimiter == ",":
+                    #     raise ValueError(f"Closing parentheses in hed string {hed_string}")
                     # Terminate existing group, and save it off.
                     paren_end = startpos + delimiter_index + 1
 
@@ -282,54 +316,21 @@ def split_hed_string(hed_string):
 
         return result_positions
 
-    def apply_funcs(self, string_funcs):
-        """ Run functions on this string.
-
-        Parameters:
-            string_funcs (list): A list of functions that take a hed string object and return a list of issues.
-
-        Returns:
-            list: A list of issues found by these operations. Each issue is a dictionary.
-
-        Notes:
-            - This method potentially modifies the hed string object.
-
+    def validate(self, hed_schema, allow_placeholders=True, error_handler=None):
         """
-        string_issues = []
-        for string_func in string_funcs:
-            string_issues += string_func(self)
-            if string_issues:
-                if check_for_any_errors(string_issues):
-                    break
-
-        return string_issues
-
-    def validate(self, hed_ops=None, error_handler=None, **kwargs):
-        """ Run the given hed_ops on this string.
+        Validate the string using the schema
 
         Parameters:
-            hed_ops: (func, HedOps, or list): Operations to apply to this object.
-            error_handler (ErrorHandler or None): Used to report errors in context.  Uses a default if None.
-            kwargs:
-                See models.hed_ops.translate_ops or the specific hed_ops for additional options
-
+            hed_schema(HedSchema): The schema to use to validate
+            allow_placeholders(bool): allow placeholders in the string
+            error_handler(ErrorHandler or None): the error handler to use, creates a default one if none passed
         Returns:
-            list:  A list of issues encountered in applying these operations. Each issue is a dictionary.
-
-        Notes:
-            - Although this function is called validation, the HedOps can represent other transformations.
-
+            issues (list of dict): A list of issues for hed string
         """
-        if error_handler is None:
-            error_handler = ErrorHandler()
-        tag_funcs = translate_ops(hed_ops, **kwargs)
+        from hed.validator import HedValidator
 
-        error_handler.push_error_context(ErrorContext.HED_STRING, self, increment_depth_after=False)
-        issues = self.apply_funcs(tag_funcs)
-        error_handler.add_context_to_issues(issues)
-        error_handler.pop_error_context()
-
-        return issues
+        validator = HedValidator(hed_schema)
+        return validator.validate(self, allow_placeholders=allow_placeholders)
 
     def find_top_level_tags(self, anchor_tags, include_groups=2):
         """ Find top level groups with an anchor tag.
@@ -359,4 +360,3 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
         if include_groups == 0 or include_groups == 1:
             return [tag[include_groups] for tag in top_level_tags]
         return top_level_tags
-
diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
index c059d8850..29bcf8cf6 100644
--- a/hed/models/hed_tag.py
+++ b/hed/models/hed_tag.py
@@ -1,5 +1,5 @@
 from hed.schema.hed_schema_constants import HedKey
-from hed.schema.hed_schema_entry import HedTagEntry
+import copy
 
 
 class HedTag:
@@ -11,7 +11,7 @@ class HedTag:
 
     """
 
-    def __init__(self, hed_string, span=None, hed_schema=None):
+    def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None):
         """ Creates a HedTag.
 
         Parameters:
@@ -23,14 +23,16 @@ def __init__(self, hed_string, span=None, hed_schema=None):
             - This does not produce issues and is used primarily for testing.
 
         """
+        if def_dict and not hed_schema:
+            raise ValueError("Passing a def_dict without also passing a schema is invalid.")
         self._hed_string = hed_string
         if span is None:
             span = (0, len(hed_string))
         # This is the span into the original hed string for this tag
         self.span = span
 
-        # If this is present, use this as the org tag for most purposes.  This is generally only filled out
-        # if the tag has a name_prefix added, or is an expanded def.
+        # If this is present, use this as the org tag for most purposes.
+        # This is not generally used anymore, but you can use it to replace a tag in place.
         self._tag = None
 
         self._schema_prefix = self._get_schema_prefix(self.org_tag)
@@ -42,8 +44,15 @@ def __init__(self, hed_string, span=None, hed_schema=None):
         self._extension_value = ""
         self._parent = None
 
+        # Downsides: two new parameters
+        # Have to check for this value, slowing everything down potentially.
+        self._expandable = None
+        self._expanded = False
+
         if hed_schema:
             self.convert_to_canonical_forms(hed_schema)
+            if def_dict:
+                def_dict.construct_def_tag(self)
 
     @property
     def schema_prefix(self):
@@ -115,10 +124,11 @@ def short_base_tag(self, new_tag_val):
             - Generally this is used to swap def to def-expand.
         """
         if self._schema_entry:
+            tag_entry = None
             if self._schema:
+                if self.is_takes_value_tag():
+                    new_tag_val = new_tag_val + "/#"
                 tag_entry = self._schema.get_tag_entry(new_tag_val, schema_prefix=self.schema_prefix)
-            else:
-                tag_entry, remainder = HedTagEntry.get_fake_tag_entry(new_tag_val, [new_tag_val.lower()])
 
             self._schema_entry = tag_entry
         else:
@@ -185,15 +195,11 @@ def tag(self, new_tag_val):
             new_tag_val (str): New (implicitly long form) of tag to set.
 
         Notes:
-            - Primarily used to add prefixes from column metadata to tags.
-            - Only valid before calling convert_to_canonical_forms.
-
+            - You probably don't actually want to call this.
         """
-
-        if self._schema_entry:
-            raise ValueError("Can only edit tags before calculating canonical forms. " +
-                             "This could be updated to instead remove computed forms.")
         self._tag = new_tag_val
+        self._schema_entry = None
+        self.convert_to_canonical_forms(self._schema)
 
     @property
     def extension_or_value_portion(self):
@@ -250,9 +256,29 @@ def tag_terms(self):
         if self._schema_entry:
             return self._schema_entry.tag_terms
 
-        # TODO: Potentially remove this.  It's just a quick hack for testing
-        return tuple(str(self).lower())
-        #return tuple()
+        return tuple()
+
+    @property
+    def expanded(self):
+        """Returns if this is currently expanded or not.
+
+           Will always be false unless expandable is set.  This is primarily used for Def/Def-expand tags at present.
+
+        Returns:
+            bool: Returns true if this is currently expanded
+        """
+        return self._expanded
+
+    @property
+    def expandable(self):
+        """Returns if this is expandable
+
+           This is primarily used for Def/Def-expand tags at present.
+
+        Returns:
+            HedGroup or HedTag or None: Returns the expanded form of this tag
+        """
+        return self._expandable
 
     def __str__(self):
         """ Convert this HedTag to a string.
@@ -269,39 +295,6 @@ def __str__(self):
 
         return self._hed_string[self.span[0]:self.span[1]]
 
-    def add_prefix_if_needed(self, required_prefix):
-        """ Add a prefix to this tag *unless* already formatted.
-
-        Parameters:
-            required_prefix (str): The full name_prefix to add if not present.
-
-        Notes:
-            - This means we verify the tag does not have the required name_prefix, or any partial name_prefix.
-
-        Examples:
-            Required: KnownTag1/KnownTag2
-
-            Case 1: KnownTag1/KnownTag2/ColumnValue
-                Will not be changed, has name_prefix already.
-
-            Case 2: KnownTag2/ColumnValue
-                Will not be changed, has partial name_prefix already.
-
-            Case 3: ColumnValue
-                Prefix will be added.
-
-        """
-
-        checking_prefix = required_prefix
-        while checking_prefix:
-            if self.lower().startswith(checking_prefix.lower()):
-                return
-            slash_index = checking_prefix.find("/") + 1
-            if slash_index == 0:
-                break
-            checking_prefix = checking_prefix[slash_index:]
-        self.tag = required_prefix + self.org_tag
-
     def lower(self):
         """ Convenience function, equivalent to str(self).lower(). """
         return str(self).lower()
@@ -316,9 +309,6 @@ def convert_to_canonical_forms(self, hed_schema):
             list:  A list of issues found during conversion. Each element is a dictionary.
 
         """
-        if not hed_schema:
-            return self._convert_key_tags_to_canonical_form()
-
         tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_prefix)
         self._schema_entry = tag_entry
         self._schema = hed_schema
@@ -433,7 +423,7 @@ def is_value_class_tag(self):
         """ Return true if this is a value class tag.
 
         Returns:
-            bool:  True if this is a a tag with a value class.
+            bool:  True if this is a tag with a value class.
 
         """
         if self._schema_entry:
@@ -536,26 +526,8 @@ def any_parent_has_attribute(self, attribute):
         if self._schema_entry:
             return self._schema_entry.any_parent_has_attribute(attribute=attribute)
 
-    def _convert_key_tags_to_canonical_form(self):
-        """ Find the canonical form for basic known tags.
-
-        Returns:
-            list: Always return an empty list.
-
-        Notes:
-            -  This is used for such as definition and def when no schema present
-
-        """
-        tags_to_identify = ["onset", "definition", "offset", "def-expand", "def"]
-        tag_entry, remainder = HedTagEntry.get_fake_tag_entry(str(self), tags_to_identify)
-        if tag_entry:
-            self._schema_entry = tag_entry
-            self._schema = None
-            self._extension_value = remainder
-
-        return []
-
-    def _get_schema_prefix(self, org_tag):
+    @staticmethod
+    def _get_schema_prefix(org_tag):
         """ Finds the library prefix for the tag.
 
         Parameters:
@@ -649,3 +621,28 @@ def __eq__(self, other):
         if self.org_tag.lower() == other.org_tag.lower():
             return True
         return False
+
+    def __deepcopy__(self, memo):
+        # check if the object has already been copied
+        if id(self) in memo:
+            return memo[id(self)]
+
+        # create a new instance of HedTag class
+        new_tag = HedTag(self._hed_string, self.span)
+
+        # add the new object to the memo dictionary
+        memo[id(self)] = new_tag
+
+        # copy all other attributes except schema and schema_entry
+        new_tag._tag = copy.deepcopy(self._tag, memo)
+        new_tag._schema_prefix = copy.deepcopy(self._schema_prefix, memo)
+        new_tag._extension_value = copy.deepcopy(self._extension_value, memo)
+        new_tag._parent = copy.deepcopy(self._parent, memo)
+        new_tag._expandable = copy.deepcopy(self._expandable, memo)
+        new_tag._expanded = copy.deepcopy(self._expanded, memo)
+
+        # reference the schema and schema_entry from the original object
+        new_tag._schema = self._schema
+        new_tag._schema_entry = self._schema_entry
+
+        return new_tag
diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py
index 59052b0b1..8b808c6d1 100644
--- a/hed/models/sidecar.py
+++ b/hed/models/sidecar.py
@@ -1,30 +1,50 @@
 import json
 from hed.models.column_metadata import ColumnMetadata
-from hed.errors.error_types import ErrorContext, SidecarErrors
+from hed.errors.error_types import ErrorContext
 from hed.errors import ErrorHandler
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.models.hed_string import HedString
 from hed.models.column_metadata import ColumnType
-from hed.models.hed_ops import apply_ops, hed_string_iter, set_hed_string
-from hed.models.sidecar_base import SidecarBase
+from hed.models.definition_dict import DefinitionDict
 
 
-class Sidecar(SidecarBase):
+# todo: Add/improve validation for definitions being in known columns(right now it just assumes they aren't)
+class Sidecar:
     """ Contents of a JSON file or merged file.
 
     """
 
-    def __init__(self, files, name=None, hed_schema=None):
+    def __init__(self, files, name=None):
         """ Construct a Sidecar object representing a JSON file.
 
         Parameters:
             files (str or FileLike or list): A string or file-like object representing a JSON file, or a list of such.
             name (str or None): Optional name identifying this sidecar, generally a filename.
-            hed_schema(HedSchema or None): The schema to use by default in identifying tags
         """
-        super().__init__(name, hed_schema=hed_schema)
+        self.name = name
         self.loaded_dict = self.load_sidecar_files(files)
-        self.def_dict = self.extract_definitions(hed_schema)
+        self._def_dict = None
+        self._extract_definition_issues = []
+
+    def __iter__(self):
+        """ An iterator to go over the individual column metadata.
+
+        Returns:
+            iterator: An iterator over the column metadata values.
+
+        """
+        return iter(self.column_data)
+
+    @property
+    def def_dict(self):
+        """This is the definitions from this sidecar.
+
+            Generally you should instead call get_def_dict to get the relevant definitions
+
+        Returns:
+            DefinitionDict: The definitions for this sidecar
+        """
+        return self._def_dict
 
     @property
     def column_data(self):
@@ -36,53 +56,38 @@ def column_data(self):
         for col_name, col_dict in self.loaded_dict.items():
             yield self._generate_single_column(col_name, col_dict)
 
-    def _hed_string_iter(self, tag_funcs, error_handler):
-        """ Low level function to retrieve hed string in sidecar
-
-        Parameters:
-            tag_funcs(list): A list of functions to apply to returned strings
-            error_handler(ErrorHandler): Error handler to use for context
-
-        Yields:
-            tuple:
-                string(HedString): The retrieved and modified string
-                position(tuple): The location of this hed string.  Black box.
-                issues(list): A list of issues running the tag_funcs.
-        """
-        for column_name, dict_for_entry in self.loaded_dict.items():
-            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-            hed_dict = dict_for_entry.get("HED", {})
-            for (hed_string_obj, position, issues) in hed_string_iter(hed_dict, tag_funcs, error_handler):
-                yield hed_string_obj, (column_name, position), issues
-
-            error_handler.pop_error_context()
-
-    def _set_hed_string(self, new_hed_string, position):
-        """ Low level function to update hed string in sidecar
+    def set_hed_string(self, new_hed_string, position):
+        """ Set a provided column/category key/etc.
 
         Parameters:
             new_hed_string (str or HedString): The new hed_string to replace the value at position.
-            position (tuple):   The value returned from hed_string_iter.
+            position (tuple):   The (HedString, str, list) tuple returned from hed_string_iter.
+
         """
         column_name, position = position
         hed_dict = self.loaded_dict[column_name]
-        hed_dict["HED"] = set_hed_string(new_hed_string, hed_dict["HED"], position)
+        hed_dict["HED"] = self._set_hed_string_low(new_hed_string, hed_dict["HED"], position)
 
-    def validate_structure(self, error_handler):
-        """ Validate the raw structure of this sidecar.
+    def get_def_dict(self, hed_schema=None, extra_def_dicts=None):
+        """ Returns the definition dict for this sidecar.
 
         Parameters:
-            error_handler(ErrorHandler): The error handler to use for error context
+            hed_schema(HedSchema): used to identify tags to find definitions
+            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.
 
         Returns:
-            issues(list): A list of issues found with the structure
+            DefinitionDict:   A single definition dict representing all the data(and extra def dicts)
         """
-        all_validation_issues = []
-        for column_name, dict_for_entry in self.loaded_dict.items():
-            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-            all_validation_issues += self._validate_column_structure(column_name, dict_for_entry, error_handler)
-            error_handler.pop_error_context()
-        return all_validation_issues
+        if self._def_dict is None and hed_schema:
+            self._def_dict = self.extract_definitions(hed_schema)
+        def_dicts = []
+        if self.def_dict:
+            def_dicts.append(self.def_dict)
+        if extra_def_dicts:
+            if not isinstance(extra_def_dicts, list):
+                extra_def_dicts = [extra_def_dicts]
+            def_dicts += extra_def_dicts
+        return DefinitionDict(def_dicts)
 
     def save_as_json(self, save_filename):
         """ Save column metadata to a JSON file.
@@ -146,6 +151,26 @@ def load_sidecar_files(self, files):
             merged_dict.update(loaded_json)
         return merged_dict
 
+    def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=None):
+        """Create a SidecarValidator and validate this sidecar with the schema.
+
+        Parameters:
+            hed_schema (HedSchema): Input data to be validated.
+            extra_def_dicts(list or DefinitionDict): extra def dicts in addition to sidecar
+            name(str): The name to report this sidecar as
+            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
+        Returns:
+            issues (list of dict): A list of issues associated with each level in the HED string.
+        """
+        from hed.validator.sidecar_validator import SidecarValidator
+
+        if error_handler is None:
+            error_handler = ErrorHandler()
+
+        validator = SidecarValidator(hed_schema)
+        issues = validator.validate(self, extra_def_dicts, name, error_handler=error_handler)
+        return issues
+
     def _load_json_file(self, fp):
         """ Load the raw json of a given file
 
@@ -176,8 +201,7 @@ def _generate_single_column(self, column_name, dict_for_entry, column_type=None)
             hed_dict = dict_for_entry.get("HED")
         else:
             hed_dict = None
-        def_removed_dict, _ = apply_ops(hed_dict, HedString.remove_definitions)
-        column_entry = ColumnMetadata(column_type, column_name, def_removed_dict)
+        column_entry = ColumnMetadata(column_type, column_name, hed_dict)
         return column_entry
 
     @staticmethod
@@ -211,36 +235,124 @@ def _detect_column_type(dict_for_entry):
 
         return ColumnType.Value
 
-    def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
-        """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
+    def extract_definitions(self, hed_schema=None, error_handler=None):
+        """ Gather and validate definitions in metadata.
 
         Parameters:
-            error_handler (ErrorHandler)  Sets the context for the error reporting. Cannot be None.
+            error_handler (ErrorHandler): The error handler to use for context, uses a default one if None.
+            hed_schema (HedSchema or None): The schema to used to identify tags.
 
         Returns:
-            list:  Issues in performing the operations. Each issue is a dictionary.
+            DefinitionDict: Contains all the definitions located in the sidecar.
 
         """
-        val_issues = []
-        column_type = self._detect_column_type(dict_for_entry=dict_for_entry)
-        if column_type is None:
-            val_issues += ErrorHandler.format_error(SidecarErrors.UNKNOWN_COLUMN_TYPE,
-                                                    column_name=column_name)
-        elif column_type == ColumnType.Categorical:
-            raw_hed_dict = dict_for_entry["HED"]
-            if not raw_hed_dict:
-                val_issues += ErrorHandler.format_error(SidecarErrors.BLANK_HED_STRING)
-            if not isinstance(raw_hed_dict, dict):
-                val_issues += ErrorHandler.format_error(SidecarErrors.WRONG_HED_DATA_TYPE,
-                                                        given_type=type(raw_hed_dict),
-                                                        expected_type="dict")
-            for key_name, hed_string in raw_hed_dict.items():
+        if error_handler is None:
+            error_handler = ErrorHandler()
+        def_dict = DefinitionDict()
+
+        self._extract_definition_issues = []
+        if hed_schema:
+            for hed_string, column_data, _ in self.hed_string_iter(error_handler):
+                hed_string_obj = HedString(hed_string, hed_schema)
+                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
+                                                 increment_depth_after=False)
+                self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
+                error_handler.pop_error_context()
+
+        return def_dict
+
+    def hed_string_iter(self, error_handler=None):
+        """ Gather and validate definitions in metadata.
+
+        Parameters:
+            error_handler (ErrorHandler): The error handler to use for context, uses a default one if None.
+
+        Yields:
+            str: The hed string at a given column and key position.
+            column_data: the column data for the given string.
+            position: blackbox(pass back to set this string to a new value)
+
+        """
+        if error_handler is None:
+            error_handler = ErrorHandler()
+
+        for column_data in self.column_data:
+            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_data.column_name)
+            hed_dict = column_data.hed_dict
+            for (hed_string, position) in self._hed_string_iter(hed_dict, error_handler):
+                yield hed_string, column_data, position
+            error_handler.pop_error_context()
+
+    @staticmethod
+    def _hed_string_iter(hed_strings, error_handler):
+        """ Iterate over the given dict of strings
+
+        Parameters:
+            hed_strings(dict or str): A hed_string or dict of hed strings
+            error_handler (ErrorHandler): The error handler to use for context, uses a default one if none.
+
+        Yields:
+            tuple:
+                - str: The hed string at a given column and key position.
+                - str: Indication of the where hed string was loaded from, so it can be later set by the user.
+
+        """
+        for hed_string, key_name in Sidecar._hed_iter_low(hed_strings):
+            if key_name:
+                error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
+            yield hed_string, key_name
+            if key_name:
+                error_handler.pop_error_context()
+
+    @staticmethod
+    def _hed_iter_low(hed_strings):
+        """ Iterate over the hed string entries.
+
+            Used by hed_string_iter
+
+        Parameters:
+            hed_strings(dict or str): A hed_string or dict of hed strings
+
+        Yields:
+            tuple:
+                - str: Individual hed strings for different entries.
+                - str: The position to pass back to set this string.
+
+        """
+        if isinstance(hed_strings, dict):
+            for key, hed_string in hed_strings.items():
                 if not isinstance(hed_string, str):
-                    error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
-                    val_issues += ErrorHandler.format_error(SidecarErrors.WRONG_HED_DATA_TYPE,
-                                                            given_type=type(hed_string),
-                                                            expected_type="str")
-                    error_handler.pop_error_context()
-        error_handler.add_context_to_issues(val_issues)
-
-        return val_issues
+                    continue
+                yield hed_string, key
+        elif isinstance(hed_strings, str):
+            yield hed_strings, None
+
+    @staticmethod
+    def _set_hed_string_low(new_hed_string, hed_strings, position=None):
+        """ Set a hed string for a category key/etc.
+
+        Parameters:
+            new_hed_string (str or HedString): The new hed_string to replace the value at position.
+            hed_strings(dict or str or HedString): The hed strings we want to update
+            position (str, optional): This should only be a value returned from hed_string_iter.
+
+        Returns:
+            updated_string (str or dict): The newly updated string/dict.
+        Raises:
+            TypeError: If the mapping cannot occur.
+
+        """
+        if isinstance(hed_strings, dict):
+            if position is None:
+                raise TypeError("Error: Trying to set a category HED string with no category")
+            if position not in hed_strings:
+                raise TypeError("Error: Not allowed to add new categories to a column")
+            hed_strings[position] = str(new_hed_string)
+        elif isinstance(hed_strings, (str, HedString)):
+            if position is not None:
+                raise TypeError("Error: Trying to set a value HED string with a category")
+            hed_strings = str(new_hed_string)
+        else:
+            raise TypeError("Error: Trying to set a HED string on a column_type that doesn't support it.")
+
+        return hed_strings
diff --git a/hed/models/sidecar_base.py b/hed/models/sidecar_base.py
deleted file mode 100644
index 8b82d3ea3..000000000
--- a/hed/models/sidecar_base.py
+++ /dev/null
@@ -1,269 +0,0 @@
-import copy
-from hed.models.column_metadata import ColumnMetadata
-from hed.errors.error_types import ErrorContext
-from hed.errors import error_reporter
-from hed.errors import ErrorHandler
-from hed.models.hed_string import HedString
-from hed.models.def_mapper import DefMapper
-from hed.models.hed_ops import translate_ops, apply_ops
-from hed.models.definition_dict import DefinitionDict
-from functools import partial
-
-
-class SidecarBase:
-    """ Baseclass for specialized spreadsheet sidecars
-
-        To subclass this class, you'll want to override at the minimum:
-        _hed_string_iter
-        _set_hed_string
-        validate_structure
-        column_data property <- This is the only truly mandatory one
-
-    """
-    def __init__(self, name=None, hed_schema=None):
-        """ Initialize a sidecar baseclass
-
-        Parameters:
-            name (str or None): Optional name identifying this sidecar, generally a filename.
-            hed_schema(HedSchema or None): The schema to use by default in identifying tags
-        """
-        self.name = name
-        self._schema = hed_schema
-        # Expected to be called in subclass after data is loaded
-        # self.def_dict = self.extract_definitions()
-
-    @property
-    def column_data(self):
-        """ Generates the list of ColumnMetadata for this sidecar
-
-        Returns:
-            list(ColumnMetadata): the list of column metadata defined by this sidecar
-        """
-        return []
-
-    def _hed_string_iter(self, tag_funcs, error_handler):
-        """ Low level function to retrieve hed string in sidecar
-
-        Parameters:
-            tag_funcs(list): A list of functions to apply to returned strings
-            error_handler(ErrorHandler): Error handler to use for context
-
-        Yields:
-            tuple:
-                string(HedString): The retrieved and modified string
-                position(tuple): The location of this hed string.  Black box.
-                issues(list): A list of issues running the tag_funcs.
-        """
-        yield
-
-    def _set_hed_string(self, new_hed_string, position):
-        """ Low level function to update hed string in sidecar
-
-        Parameters:
-            new_hed_string (str or HedString): The new hed_string to replace the value at position.
-            position (tuple):   The value returned from hed_string_iter.
-        """
-        return
-
-    def validate_structure(self, error_handler):
-        """ Validate the raw structure of this sidecar.
-
-        Parameters:
-            error_handler(ErrorHandler): The error handler to use for error context
-
-        Returns:
-            issues(list): A list of issues found with the structure
-        """
-        return []
-
-    def __iter__(self):
-        """ An iterator to go over the individual column metadata.
-
-        Returns:
-            iterator: An iterator over the column metadata values.
-
-        """
-        return iter(self.column_data)
-
-    def hed_string_iter(self, hed_ops=None, error_handler=None, expand_defs=False, remove_definitions=False,
-                        allow_placeholders=True, extra_def_dicts=None, **kwargs):
-        """ Iterator over hed strings in columns.
-
-        Parameters:
-            hed_ops (func, HedOps, list):  A HedOps, funcs or list of these to apply to the hed strings
-                                            before returning
-            error_handler (ErrorHandler): The error handler to use for context, uses a default one if none.
-            expand_defs (bool): If True, expand all def tags located in the strings.
-            remove_definitions (bool): If True, remove all definitions found in the string.
-            allow_placeholders (bool): If False, placeholders will be marked as validation warnings.
-            extra_def_dicts (DefinitionDict, list, None): Extra dicts to add to the list.
-            kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options.
-
-        Yields:
-            tuple:
-                - HedString: A HedString at a given column and key position.
-                - tuple: Indicates where hed_string was loaded from so it can be later set by the user
-                - list: A list of issues found performing ops. Each issue is a dictionary.
-
-        """
-        if error_handler is None:
-            error_handler = ErrorHandler()
-        hed_ops = self._standardize_ops(hed_ops)
-        if expand_defs or remove_definitions:
-            self._add_definition_mapper(hed_ops, extra_def_dicts)
-        tag_funcs = translate_ops(hed_ops, hed_schema=self._schema, error_handler=error_handler,
-                                  expand_defs=expand_defs, allow_placeholders=allow_placeholders,
-                                  remove_definitions=remove_definitions, **kwargs)
-
-        return self._hed_string_iter(tag_funcs, error_handler)
-
-    def set_hed_string(self, new_hed_string, position):
-        """ Set a provided column/category key/etc.
-
-        Parameters:
-            new_hed_string (str or HedString): The new hed_string to replace the value at position.
-            position (tuple):   The (HedString, str, list) tuple returned from hed_string_iter.
-
-        """
-        return self._set_hed_string(new_hed_string, position)
-
-    def _add_definition_mapper(self, hed_ops, extra_def_dicts=None):
-        """ Add a DefMapper if the hed_ops list doesn't have one.
-
-        Parameters:
-            hed_ops (list):  A list of HedOps
-            extra_def_dicts (list):  DefDicts from outside.
-
-        Returns:
-            DefMapper:  A shallow copy of the hed_ops list with a DefMapper added if there wasn't one.
-
-        """
-        def_mapper_list = [hed_op for hed_op in hed_ops if isinstance(hed_op, DefMapper)]
-
-        if not def_mapper_list:
-            def_dicts = self.get_def_dicts(extra_def_dicts)
-            def_mapper = DefMapper(def_dicts)
-            hed_ops.append(def_mapper)
-            return def_mapper
-        return def_mapper_list[0]
-
-    @staticmethod
-    def _standardize_ops(hed_ops):
-        if not isinstance(hed_ops, list):
-            hed_ops = [hed_ops]
-        return hed_ops.copy()
-
-    def get_def_dicts(self, extra_def_dicts=None):
-        """ Returns the definition dict for this sidecar.
-
-        Parameters:
-            extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.
-
-        Returns:
-            list: A list with the sidecar def_dict plus any found in extra_def_dicts.
-
-        """
-        def_dicts = [self.def_dict]
-        if extra_def_dicts:
-            if not isinstance(extra_def_dicts, list):
-                extra_def_dicts = [extra_def_dicts]
-            def_dicts += extra_def_dicts
-        return def_dicts
-
-    def validate_entries(self, hed_ops=None, name=None, extra_def_dicts=None,
-                         error_handler=None, **kwargs):
-        """ Run the given hed_ops on all columns in this sidecar.
-
-        Parameters:
-            hed_ops (list, func, or HedOps): A HedOps, func or list of these to apply to hed strings in this sidecar.
-            name (str): If present, will use this as the filename for context, rather than using the actual filename
-                Useful for temp filenames.
-            extra_def_dicts (DefinitionDict, list, or None): If present use these in addition to sidecar's def dicts.
-            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.
-            kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options.
-
-        Returns:
-            list: The list of validation issues found. Individual issues are in the form of a dict.
-
-        """
-        if error_handler is None:
-            error_handler = error_reporter.ErrorHandler()
-        if not name:
-            name = self.name
-        if name:
-            error_handler.push_error_context(ErrorContext.FILE_NAME, name, False)
-
-        all_validation_issues = self.validate_structure(error_handler)
-
-        # Early out major errors so the rest of our code can assume they won't happen.
-        if all_validation_issues:
-            return all_validation_issues
-
-        hed_ops = self._standardize_ops(hed_ops)
-        def_mapper = self._add_definition_mapper(hed_ops, extra_def_dicts)
-        all_validation_issues += def_mapper.issues
-
-        for hed_string, key_name, issues in self.hed_string_iter(hed_ops=hed_ops, allow_placeholders=True,
-                                                                 error_handler=error_handler, **kwargs):
-            self.set_hed_string(hed_string, key_name)
-            all_validation_issues += issues
-
-        # Finally check what requires the final mapped data to check
-        for column_data in self.column_data:
-            validate_pound_func = partial(self._validate_pound_sign_count, column_type=column_data.column_type)
-            _, issues = apply_ops(column_data.hed_dict, validate_pound_func)
-            all_validation_issues += issues
-        all_validation_issues += self.def_dict.get_definition_issues()
-        if name:
-            error_handler.pop_error_context()
-        return all_validation_issues
-
-    def extract_definitions(self, hed_schema=None, error_handler=None):
-        """ Gather and validate definitions in metadata.
-
-        Parameters:
-            error_handler (ErrorHandler): The error handler to use for context, uses a default one if None.
-            hed_schema (HedSchema or None): The schema to used to identify tags.
-
-        Returns:
-            DefinitionDict: Contains all the definitions located in the column.
-            issues: List of issues encountered in extracting the definitions. Each issue is a dictionary.
-
-        """
-        if error_handler is None:
-            error_handler = ErrorHandler()
-        new_def_dict = DefinitionDict()
-        hed_ops = []
-        hed_ops.append(hed_schema)
-        hed_ops.append(new_def_dict)
-
-        all_issues = []
-        for hed_string, key_name, issues in self.hed_string_iter(hed_ops=hed_ops, allow_placeholders=True,
-                                                                 error_handler=error_handler):
-            all_issues += issues
-
-        return new_def_dict
-
-    def _validate_pound_sign_count(self, hed_string, column_type):
-        """ Check if a given hed string in the column has the correct number of pound signs.
-
-        Parameters:
-            hed_string (str or HedString): HED string to be checked.
-
-        Returns:
-            list: Issues due to pound sign errors. Each issue is a dictionary.
-
-        Notes:
-            Normally the number of # should be either 0 or 1, but sometimes will be higher due to the
-            presence of definition tags.
-
-        """
-        # Make a copy without definitions to check placeholder count.
-        expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type)
-        hed_string_copy = copy.deepcopy(hed_string)
-        hed_string_copy.remove_definitions()
-
-        if hed_string_copy.lower().count("#") != expected_count:
-            return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#"))
-
-        return []
diff --git a/hed/models/spreadsheet_input.py b/hed/models/spreadsheet_input.py
index 77a497449..b48f6985f 100644
--- a/hed/models/spreadsheet_input.py
+++ b/hed/models/spreadsheet_input.py
@@ -1,6 +1,5 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.models.base_input import BaseInput
-from hed.models.def_mapper import DefMapper
 
 
 class SpreadsheetInput(BaseInput):
@@ -8,7 +7,7 @@ class SpreadsheetInput(BaseInput):
 
     def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=None,
                  has_column_names=True, column_prefix_dictionary=None,
-                 def_dicts=None, name=None, hed_schema=None):
+                 name=None):
         """Constructor for the SpreadsheetInput class.
 
         Parameters:
@@ -21,9 +20,7 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N
             has_column_names (bool): True if file has column names. Validation will skip over the
                 first line of the file if the spreadsheet as column names.
             column_prefix_dictionary (dict): A dictionary with column number keys and prefix values.
-            def_dicts (DefinitionDict or list):  A DefinitionDict or list of DefDicts containing definitions for this
-                object other than the ones extracted from the SpreadsheetInput object itself.
-            hed_schema(HedSchema or None): The schema to use by default in identifying tags
+                This is partially deprecated - what this now turns the given columns into Value columns.
         Examples:
             A prefix dictionary {3: 'Label/', 5: 'Description/'} indicates that column 3 and 5 have HED tags
             that need to be prefixed by Label/ and Description/ respectively.
@@ -38,7 +35,4 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N
         new_mapper = ColumnMapper(tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary,
                                   warn_on_missing_column=False)
 
-        def_mapper = DefMapper(def_dicts)
-
-        super().__init__(file, file_type, worksheet_name, has_column_names, new_mapper, def_mapper=def_mapper,
-                         name=name, hed_schema=hed_schema)
+        super().__init__(file, file_type, worksheet_name, has_column_names, new_mapper, name=name)
diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py
index 2b9c2089a..388718fb9 100644
--- a/hed/models/tabular_input.py
+++ b/hed/models/tabular_input.py
@@ -1,7 +1,6 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.models.base_input import BaseInput
 from hed.models.sidecar import Sidecar
-from hed.models.def_mapper import DefMapper
 
 
 class TabularInput(BaseInput):
@@ -9,64 +8,30 @@ class TabularInput(BaseInput):
 
     HED_COLUMN_NAME = "HED"
 
-    def __init__(self, file=None, sidecar=None, extra_def_dicts=None, also_gather_defs=True, name=None,
-                 hed_schema=None):
+    def __init__(self, file=None, sidecar=None, name=None):
 
         """ Constructor for the TabularInput class.
 
         Parameters:
             file (str or file like): A tsv file to open.
             sidecar (str or Sidecar): A Sidecar filename or Sidecar
-            extra_def_dicts ([DefinitionDict], DefinitionDict, or None): DefinitionDict objects containing all
-                the definitions this file should use other than the ones coming from the file
-                itself and from the sidecar.  These are added as the last entries, so names will override
-                earlier ones.
+                Note: If this is a string you MUST also pass hed_schema.
             name (str): The name to display for this file for error purposes.
-            hed_schema(HedSchema or None): The schema to use by default in identifying tags
         """
         if sidecar and not isinstance(sidecar, Sidecar):
             sidecar = Sidecar(sidecar)
         new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME],
                                   warn_on_missing_column=True)
 
-        definition_columns = [self.HED_COLUMN_NAME]
         self._sidecar = sidecar
-        self._also_gather_defs = also_gather_defs
-        if extra_def_dicts and not isinstance(extra_def_dicts, list):
-            extra_def_dicts = [extra_def_dicts]
-        self._extra_def_dicts = extra_def_dicts
-        def_mapper = self.create_def_mapper(new_mapper)
 
         super().__init__(file, file_type=".tsv", worksheet_name=None, has_column_names=True, mapper=new_mapper,
-                         def_mapper=def_mapper, name=name, definition_columns=definition_columns,
-                         allow_blank_names=False, hed_schema=hed_schema)
+                         name=name, allow_blank_names=False, )
 
         if not self._has_column_names:
             raise ValueError("You are attempting to open a bids_old style file with no column headers provided.\n"
                              "This is probably not intended.")
 
-    def create_def_mapper(self, column_mapper):
-        """ Create the definition mapper for this file.
-
-        Parameters:
-            column_mapper (ColumnMapper): The column mapper to gather definitions from.
-
-
-        Returns:
-            def mapper (DefMapper): A class to validate or expand definitions with the given def dicts.
-
-        Notes:
-            - The extra_def_dicts are definitions not included in the column mapper.
-
-        """
-
-        def_dicts = column_mapper.get_def_dicts()
-        if self._extra_def_dicts:
-            def_dicts += self._extra_def_dicts
-        def_mapper = DefMapper(def_dicts)
-
-        return def_mapper
-
     def reset_column_mapper(self, sidecar=None):
         """ Change the sidecars and settings.
 
@@ -76,25 +41,4 @@ def reset_column_mapper(self, sidecar=None):
         """
         new_mapper = ColumnMapper(sidecar=sidecar, optional_tag_columns=[self.HED_COLUMN_NAME])
 
-        self._def_mapper = self.create_def_mapper(new_mapper)
         self.reset_mapper(new_mapper)
-
-    def validate_sidecar(self, hed_ops=None, error_handler=None, **kwargs):
-        """ Validate column definitions and hed strings.
-
-        Parameters:
-            hed_ops (list or HedOps): A list of HedOps of funcs to apply to the hed strings in the sidecars.
-            error_handler (ErrorHandler or None): Used to report errors.  Uses a default one if none passed in.
-            kwargs: See models.hed_ops.translate_ops or the specific hed_ops for additional options.
-
-        Returns:
-            list: A list of syntax and semantic issues found in the definitions. Each issue is a dictionary.
-
-        Notes:
-            - For full validation you should validate the sidecar separately.
-
-        """
-        if not isinstance(hed_ops, list):
-            hed_ops = [hed_ops]
-        hed_ops.append(self._def_mapper)
-        return self._sidecar.validate_entries(hed_ops, error_handler=error_handler, **kwargs)
diff --git a/hed/models/timeseries_input.py b/hed/models/timeseries_input.py
index c7ca5c215..0b9cbee18 100644
--- a/hed/models/timeseries_input.py
+++ b/hed/models/timeseries_input.py
@@ -22,4 +22,4 @@ def __init__(self, file=None, sidecar=None, extra_def_dicts=None, name=None):
         """
 
         super().__init__(file, file_type=".tsv", worksheet_name=None, has_column_names=False, mapper=None,
-                         def_mapper=None, name=name)
+                         name=name)
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 10b9aa6cc..84c2accbf 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -62,7 +62,7 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
                 if validator:
                     error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name, False)
                     new_issues = validator(hed_schema, tag_entry, tag_entry.attributes[attribute_name])
-                    error_handler.add_context_to_issues(new_issues)
+                    error_handler.add_context_and_filter(new_issues)
                     issues_list += new_issues
                     error_handler.pop_error_context()
             error_handler.pop_error_context()
diff --git a/hed/validator/__init__.py b/hed/validator/__init__.py
index 88b772ca8..4a8b94209 100644
--- a/hed/validator/__init__.py
+++ b/hed/validator/__init__.py
@@ -2,3 +2,7 @@
 
 from .hed_validator import HedValidator
 from .tag_validator import TagValidator
+from .sidecar_validator import SidecarValidator
+from .def_validator import DefValidator
+from .onset_validator import OnsetValidator
+from .spreadsheet_validator import SpreadsheetValidator
\ No newline at end of file
diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py
new file mode 100644
index 000000000..24a3d8e5b
--- /dev/null
+++ b/hed/validator/def_validator.py
@@ -0,0 +1,78 @@
+from hed.models.hed_string import HedString
+from hed.models.hed_tag import HedTag
+from hed.models.definition_dict import DefinitionDict
+from hed.errors.error_types import ValidationErrors
+from hed.errors.error_reporter import ErrorHandler
+
+
+class DefValidator(DefinitionDict):
+    """ Handles validating Def/ and Def-expand/.
+
+    """
+
+    def __init__(self, def_dicts=None, hed_schema=None):
+        """ Initialize for definitions in hed strings.
+
+        Parameters:
+            def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass
+
+        """
+        super().__init__(def_dicts, hed_schema=hed_schema)
+
+    def validate_def_tags(self, hed_string_obj):
+        """ Validate Def/Def-Expand tags.
+
+        Parameters:
+            hed_string_obj (HedString): The hed string to process.
+
+        Returns:
+            list: Issues found related to validating defs. Each issue is a dictionary.
+        """
+        hed_string_lower = hed_string_obj.lower()
+        if self._label_tag_name not in hed_string_lower:
+            return []
+
+        def_issues = []
+        # We need to check for labels to expand in ALL groups
+        for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
+            def_issues += self._validate_def_contents(def_tag, def_expand_group)
+
+        return def_issues
+
+    def _validate_def_contents(self, def_tag, def_expand_group):
+        """ Check for issues with expanding a tag from Def to a Def-expand tag group
+
+        Parameters:
+            def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag.
+            def_expand_group (HedGroup or HedTag):
+            Source group for this def-expand tag.  Same as def_tag if this is not a def-expand tag.
+
+        Returns:
+            issues
+        """
+        def_issues = []
+
+        is_label_tag = def_tag.extension_or_value_portion
+        placeholder = None
+        found_slash = is_label_tag.find("/")
+        if found_slash != -1:
+            placeholder = is_label_tag[found_slash + 1:]
+            is_label_tag = is_label_tag[:found_slash]
+
+        label_tag_lower = is_label_tag.lower()
+        def_entry = self.defs.get(label_tag_lower)
+        if def_entry is None:
+            def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=def_tag)
+        else:
+            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
+            if def_tag_name:
+                if def_expand_group is not def_tag and def_expand_group != def_contents:
+                    def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
+                                                            tag=def_tag, actual_def=def_contents,
+                                                            found_def=def_expand_group)
+            elif def_entry.takes_value:
+                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=def_tag)
+            else:
+                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_EXTRA, tag=def_tag)
+
+        return def_issues
diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py
index 600d5bb87..c7ce76adf 100644
--- a/hed/validator/hed_validator.py
+++ b/hed/validator/hed_validator.py
@@ -6,50 +6,86 @@
 """
 
 from hed.errors.error_types import ValidationErrors
-from hed.errors.error_reporter import ErrorHandler
+from hed.errors.error_reporter import ErrorHandler, check_for_any_errors
 
 from hed.models.hed_string import HedString
 from hed.models import HedTag
 from hed.validator.tag_validator import TagValidator
-from functools import partial
-from hed.models.hed_ops import HedOps
+from hed.validator.def_validator import DefValidator
+from hed.validator.onset_validator import OnsetValidator
 
 
-class HedValidator(HedOps):
+class HedValidator:
     """ Top level validation of HED strings. """
 
-    def __init__(self, hed_schema=None, run_semantic_validation=True):
+    def __init__(self, hed_schema=None, def_dicts=None, run_full_onset_checks=True):
         """ Constructor for the HedValidator class.
 
         Parameters:
             hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation.
-            run_semantic_validation (bool): True if the validator should check the HED data against a schema.
         """
         super().__init__()
         self._tag_validator = None
         self._hed_schema = hed_schema
 
-        self._tag_validator = TagValidator(hed_schema=self._hed_schema,
-                                           run_semantic_validation=run_semantic_validation)
-        self._run_semantic_validation = run_semantic_validation
-
-    def __get_tag_funcs__(self, **kwargs):
-        string_funcs = []
-        allow_placeholders = kwargs.get("allow_placeholders")
-        check_for_warnings = kwargs.get("check_for_warnings")
-        string_funcs.append(self._tag_validator.run_hed_string_validators)
-        string_funcs.append(
-            partial(HedString.convert_to_canonical_forms, hed_schema=self._hed_schema))
-        string_funcs.append(partial(self._validate_individual_tags_in_hed_string,
-                                    allow_placeholders=allow_placeholders,
-                                    check_for_warnings=check_for_warnings))
-        return string_funcs
-
-    def __get_string_funcs__(self, **kwargs):
-        check_for_warnings = kwargs.get("check_for_warnings")
-        string_funcs = [partial(self._validate_tags_in_hed_string, check_for_warnings=check_for_warnings),
-                        self._validate_groups_in_hed_string]
-        return string_funcs
+        self._tag_validator = TagValidator(hed_schema=self._hed_schema)
+        self._def_validator = DefValidator(def_dicts, hed_schema)
+        self._onset_validator = OnsetValidator(def_dict=self._def_validator,
+                                               run_full_onset_checks=run_full_onset_checks)
+
+    def validate(self, hed_string, allow_placeholders, error_handler=None):
+        """
+        Validate the string using the schema
+
+        Parameters:
+            hed_string(HedString): the string to validate
+            allow_placeholders(bool): allow placeholders in the string
+            error_handler(ErrorHandler or None): the error handler to use, creates a default one if none passed
+        Returns:
+            issues (list of dict): A list of issues for hed string
+        """
+        if not error_handler:
+            error_handler = ErrorHandler()
+        issues = []
+        issues += self.run_basic_checks(hed_string, allow_placeholders=allow_placeholders)
+        error_handler.add_context_and_filter(issues)
+        if check_for_any_errors(issues):
+            return issues
+        issues += self.run_full_string_checks(hed_string)
+        error_handler.add_context_and_filter(issues)
+        return issues
+
+    def run_basic_checks(self, hed_string, allow_placeholders):
+        issues = []
+        issues += self._tag_validator.run_hed_string_validators(hed_string)
+        if check_for_any_errors(issues):
+            return issues
+        if hed_string == "n/a" or not self._hed_schema:
+            return issues
+        issues += hed_string.convert_to_canonical_forms(self._hed_schema)
+        if check_for_any_errors(issues):
+            return issues
+        # This is required so it can validate the tag a tag expands into
+        # e.g. checking units when a definition placeholder has units
+        self._def_validator.construct_def_tags(hed_string)
+        issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders)
+        if check_for_any_errors(issues):
+            return issues
+        issues += self._def_validator.validate_def_tags(hed_string)
+        if check_for_any_errors(issues):
+            return issues
+        issues += self._onset_validator.validate_onset_offset(hed_string)
+        if check_for_any_errors(issues):
+            return issues
+        return issues
+
+    def run_full_string_checks(self, hed_string):
+        issues = []
+        issues += self._validate_tags_in_hed_string(hed_string)
+        if check_for_any_errors(issues):
+            return issues
+        issues += self._validate_groups_in_hed_string(hed_string)
+        return issues
 
     def _validate_groups_in_hed_string(self, hed_string_obj):
         """ Report invalid groups at each level.
@@ -103,26 +139,21 @@ def _check_for_duplicate_groups(self, original_group):
         self._check_for_duplicate_groups_recursive(sorted_group, validation_issues)
         return validation_issues
 
-    def _validate_tags_in_hed_string(self, hed_string_obj, check_for_warnings=False):
-        """ Report invalid the multi-tag properties.
+    def _validate_tags_in_hed_string(self, hed_string_obj):
+        """ Report invalid the multi-tag properties in a hed string, e.g. required tags..
 
          Parameters:
             hed_string_obj (HedString): A HedString object.
 
          Returns:
             list: The issues associated with the tags in the HED string. Each issue is a dictionary.
-
-        Notes:
-            - in a hed string, eg required tags.
-
-         """
+        """
         validation_issues = []
         tags = hed_string_obj.get_all_tags()
-        validation_issues += self._tag_validator.run_all_tags_validators(tags, check_for_warnings=check_for_warnings)
+        validation_issues += self._tag_validator.run_all_tags_validators(tags)
         return validation_issues
 
-    def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placeholders=False,
-                                                check_for_warnings=False):
+    def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placeholders=False):
         """ Validate individual tags in a HED string.
 
          Parameters:
@@ -139,9 +170,15 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol
         for group in hed_string_obj.get_all_groups():
             is_definition = group in all_def_groups
             for hed_tag in group.tags():
-                validation_issues += \
-                    self._tag_validator.run_individual_tag_validators(hed_tag, allow_placeholders=allow_placeholders,
-                                                                      check_for_warnings=check_for_warnings,
-                                                                      is_definition=is_definition)
+                if hed_tag.expandable and not hed_tag.expanded:
+                    for tag in hed_tag.expandable.get_all_tags():
+                        validation_issues += self._tag_validator. \
+                            run_individual_tag_validators(tag, allow_placeholders=allow_placeholders,
+                                                          is_definition=is_definition)
+                else:
+                    validation_issues += self._tag_validator. \
+                        run_individual_tag_validators(hed_tag,
+                                                      allow_placeholders=allow_placeholders,
+                                                      is_definition=is_definition)
 
         return validation_issues
diff --git a/hed/models/onset_mapper.py b/hed/validator/onset_validator.py
similarity index 76%
rename from hed/models/onset_mapper.py
rename to hed/validator/onset_validator.py
index 842ff25a6..942f58efb 100644
--- a/hed/models/onset_mapper.py
+++ b/hed/validator/onset_validator.py
@@ -2,29 +2,24 @@
 from hed.models.hed_group import HedGroup
 from hed.errors.error_reporter import ErrorHandler
 from hed.errors.error_types import OnsetErrors
-from hed.models.hed_ops import HedOps
 
 
-class OnsetMapper(HedOps):
-    """ HedOps responsible for matching onset/offset pairs. """
+class OnsetValidator:
+    """ Validates onset/offset pairs. """
 
-    def __init__(self, def_mapper):
-        super().__init__()
-        self._def_mapper = def_mapper
+    def __init__(self, def_dict, run_full_onset_checks=True):
+        self._defs = def_dict
         self._onsets = {}
+        self._run_full_onset_checks = run_full_onset_checks
 
-    def check_for_onset_offset(self, hed_string_obj):
-        """ Check for onset or offset and track context.
+    def validate_onset_offset(self, hed_string_obj):
+        """ Validate onset/offset
 
         Parameters:
-            hed_string_obj (HedString): The hed string to check.  Finds a maximum of one onset tag.
+            hed_string_obj (HedString): The hed string to check.
 
         Returns:
             list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
-
-        Notes:
-            - Each issue in the return list is a dictionary.
-
         """
         onset_issues = []
         for found_onset, found_group in self._find_onset_tags(hed_string_obj):
@@ -82,28 +77,21 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
             placeholder = def_name[found_slash + 1:]
             def_name = def_name[:found_slash]
 
-        def_entry = self._def_mapper.get_def_entry(def_name)
+        def_entry = self._defs.get_def_entry(def_name)
         if def_entry is None:
             return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
         if bool(def_entry.takes_value) != bool(placeholder):
             return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
                                              has_placeholder=bool(def_entry.takes_value))
 
-        if is_onset:
-            # onset can never fail as it implies an offset
-            self._onsets[full_def_name.lower()] = full_def_name
-        else:
-            if full_def_name.lower() not in self._onsets:
-                return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
+        if self._run_full_onset_checks:
+            if is_onset:
+                # onset can never fail as it implies an offset
+                self._onsets[full_def_name.lower()] = full_def_name
             else:
-                del self._onsets[full_def_name.lower()]
-
-        return []
-
-    def __get_string_funcs__(self, **kwargs):
-        string_funcs = []
-        string_funcs.append(self.check_for_onset_offset)
-        return string_funcs
+                if full_def_name.lower() not in self._onsets:
+                    return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
+                else:
+                    del self._onsets[full_def_name.lower()]
 
-    def __get_tag_funcs__(self, **kwargs):
         return []
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
new file mode 100644
index 000000000..af12005b1
--- /dev/null
+++ b/hed/validator/sidecar_validator.py
@@ -0,0 +1,147 @@
+import copy
+from hed.errors import ErrorHandler, ErrorContext, SidecarErrors
+from hed.models import ColumnType
+from hed import HedString
+from hed import Sidecar
+from hed.models.column_metadata import ColumnMetadata
+
+
+class SidecarValidator:
+    reserved_column_names = ["HED"]
+    reserved_category_values = ["n/a"]
+
+    def __init__(self, hed_schema):
+        """
+        Constructor for the HedValidator class.
+
+        Parameters:
+            hed_schema (HedSchema): HED schema object to use for validation.
+        """
+        self._schema = hed_schema
+
+    def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None):
+        """Validate the input data using the schema
+
+        Parameters:
+            sidecar (Sidecar): Input data to be validated.
+            extra_def_dicts(list or DefinitionDict): extra def dicts in addition to sidecar
+            name(str): The name to report this sidecar as
+            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
+        Returns:
+            issues (list of dict): A list of issues associated with each level in the HED string.
+        """
+        from hed.validator import HedValidator
+        issues = []
+        if error_handler is None:
+            error_handler = ErrorHandler()
+
+        error_handler.push_error_context(ErrorContext.FILE_NAME, name)
+        sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts)
+        hed_validator = HedValidator(self._schema,
+                                     def_dicts=sidecar_def_dict,
+                                     run_full_onset_checks=False)
+
+        issues += self.validate_structure(sidecar, error_handler=error_handler)
+        issues += sidecar._extract_definition_issues
+        issues += sidecar_def_dict.issues
+        # todo: Add the definition validation.
+
+        for hed_string, column_data, position in sidecar.hed_string_iter(error_handler):
+            hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
+
+            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
+                                             increment_depth_after=False)
+            new_issues = hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True)
+            if not new_issues:
+                new_issues = hed_validator.run_full_string_checks(hed_string_obj)
+            if not new_issues:
+                new_issues = self._validate_pound_sign_count(hed_string_obj, column_type=column_data.column_type)
+            error_handler.add_context_and_filter(new_issues)
+            issues += new_issues
+            error_handler.pop_error_context()
+
+        error_handler.pop_error_context()
+        return issues
+
+    def validate_structure(self, sidecar, error_handler):
+        """ Validate the raw structure of this sidecar.
+
+        Parameters:
+            sidecar(Sidecar): the sidecar to validate
+            error_handler(ErrorHandler): The error handler to use for error context
+
+        Returns:
+            issues(list): A list of issues found with the structure
+        """
+        all_validation_issues = []
+        for column_name, dict_for_entry in sidecar.loaded_dict.items():
+            error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
+            all_validation_issues += self._validate_column_structure(column_name, dict_for_entry, error_handler)
+            error_handler.pop_error_context()
+        return all_validation_issues
+
+    def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
+        """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
+
+        Parameters:
+            error_handler (ErrorHandler)  Sets the context for the error reporting. Cannot be None.
+
+        Returns:
+            list:  Issues in performing the operations. Each issue is a dictionary.
+
+        """
+        val_issues = []
+        if column_name in self.reserved_column_names:
+            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
+            return val_issues
+
+        column_type = Sidecar._detect_column_type(dict_for_entry=dict_for_entry)
+        if column_type is None:
+            val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
+                                                                  column_name=column_name)
+        elif column_type == ColumnType.Categorical:
+            raw_hed_dict = dict_for_entry["HED"]
+            if not raw_hed_dict:
+                val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
+            if not isinstance(raw_hed_dict, dict):
+                val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
+                                                                      given_type=type(raw_hed_dict),
+                                                                      expected_type="dict")
+            for key_name, hed_string in raw_hed_dict.items():
+                error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
+                if not isinstance(hed_string, str):
+                    val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
+                                                                          given_type=type(hed_string),
+                                                                          expected_type="str")
+                if not hed_string:
+                    val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
+                if key_name in self.reserved_category_values:
+                    val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
+                error_handler.pop_error_context()
+
+        return val_issues
+
+    def _validate_pound_sign_count(self, hed_string, column_type):
+        """ Check if a given hed string in the column has the correct number of pound signs.
+
+        Parameters:
+            hed_string (str or HedString): HED string to be checked.
+
+        Returns:
+            list: Issues due to pound sign errors. Each issue is a dictionary.
+
+        Notes:
+            Normally the number of # should be either 0 or 1, but sometimes will be higher due to the
+            presence of definition tags.
+
+        """
+        # Make a copy without definitions to check placeholder count.
+        expected_count, error_type = ColumnMetadata.expected_pound_sign_count(column_type)
+        hed_string_copy = copy.deepcopy(hed_string)
+        hed_string_copy.remove_definitions()
+        hed_string_copy.shrink_defs()
+
+        if hed_string_copy.lower().count("#") != expected_count:
+            return ErrorHandler.format_error(error_type, pound_sign_count=str(hed_string_copy).count("#"))
+
+        return []
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
new file mode 100644
index 000000000..136b5aa73
--- /dev/null
+++ b/hed/validator/spreadsheet_validator.py
@@ -0,0 +1,114 @@
+import pandas as pd
+from hed import BaseInput
+from hed.errors import ErrorHandler, ValidationErrors, ErrorContext
+from hed.models import ColumnType
+from hed import HedString
+from hed.models.hed_string_group import HedStringGroup
+
+PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
+
+
+class SpreadsheetValidator:
+    def __init__(self, hed_schema):
+        """
+        Constructor for the HedValidator class.
+
+        Parameters:
+            hed_schema (HedSchema): HED schema object to use for validation.
+        """
+        self._schema = hed_schema
+        self._hed_validator = None
+
+    def validate(self, data, def_dicts=None, name=None, error_handler=None):
+        """
+        Validate the input data using the schema
+
+        Parameters:
+            data (BaseInput or pd.DataFrame): Input data to be validated.
+            def_dicts(list of DefDict or DefDict): all definitions to use for validation
+            name(str): The name to report errors from this file as
+            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
+        Returns:
+            issues (list of dict): A list of issues for hed string
+        """
+        from hed.validator import HedValidator
+        issues = []
+        if error_handler is None:
+            error_handler = ErrorHandler()
+
+        error_handler.push_error_context(ErrorContext.FILE_NAME, name)
+        self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
+        # Check the structure of the input data, if it's a BaseInput
+        if isinstance(data, BaseInput):
+            issues += self._validate_column_structure(data, error_handler)
+            data = data.dataframe_a
+
+        # Check the rows of the input data
+        issues += self._run_checks(data, error_handler)
+        error_handler.pop_error_context()
+        return issues
+
+    def _run_checks(self, data, error_handler):
+        issues = []
+        for row_number, text_file_row in enumerate(data.itertuples(index=False)):
+            error_handler.push_error_context(ErrorContext.ROW, row_number)
+            row_strings = []
+            new_column_issues = []
+            # todo: make this report the correct column numbers(somehow  - it almost surely doesn't right now)
+            for column_number, cell in enumerate(text_file_row):
+                if not cell or cell == "n/a":
+                    continue
+
+                error_handler.push_error_context(ErrorContext.COLUMN, column_number)
+
+                column_hed_string = HedString(cell)
+                row_strings.append(column_hed_string)
+                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string,
+                                                 increment_depth_after=False)
+                new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False)
+
+                error_handler.add_context_and_filter(new_column_issues)
+                error_handler.pop_error_context()
+                error_handler.pop_error_context()
+
+                issues += new_column_issues
+            if new_column_issues:
+                continue
+            else:
+                row_string = HedStringGroup(row_strings)
+                error_handler.push_error_context(ErrorContext.HED_STRING, row_string, increment_depth_after=False)
+                new_column_issues = self._hed_validator.run_full_string_checks(row_string)
+
+                error_handler.add_context_and_filter(new_column_issues)
+                error_handler.pop_error_context()
+                issues += new_column_issues
+            error_handler.pop_error_context()
+        return issues
+
+    def _validate_column_structure(self, base_input, error_handler):
+        """
+        Validate that each column in the input data has valid values.
+
+        Parameters:
+            base_input (BaseInput): The input data to be validated.
+        Returns:
+            List of issues associated with each invalid value. Each issue is a dictionary.
+        """
+        issues = []
+        col_issues = base_input._mapper.get_column_mapping_issues()
+        error_handler.add_context_and_filter(col_issues)
+        issues += col_issues
+        for column in base_input.column_metadata().values():
+            if column.column_type == ColumnType.Categorical:
+                error_handler.push_error_context(ErrorContext.COLUMN, column.column_name)
+                valid_keys = column.hed_dict.keys()
+                for row_number, value in enumerate(base_input.dataframe[column.column_name]):
+                    if value != "n/a" and value not in valid_keys:
+                        error_handler.push_error_context(ErrorContext.ROW, row_number)
+                        issues += error_handler.format_error_with_context(ValidationErrors.HED_SIDECAR_KEY_MISSING,
+                                                                          invalid_key=value,
+                                                                          category_keys=list(valid_keys))
+                        error_handler.pop_error_context()
+                error_handler.pop_error_context()
+
+        return issues
diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py
index 29b5c9f1b..2d08eae62 100644
--- a/hed/validator/tag_validator.py
+++ b/hed/validator/tag_validator.py
@@ -13,7 +13,7 @@
 class TagValidator:
     """ Validation for individual HED tags. """
 
-    CAMEL_CASE_EXPRESSION = r'([A-Z-]+\s*[a-z-]*)+'
+    CAMEL_CASE_EXPRESSION = r'([A-Z]+\s*[a-z-]*)+'
     INVALID_STRING_CHARS = '[]{}~'
     OPENING_GROUP_CHARACTER = '('
     CLOSING_GROUP_CHARACTER = ')'
@@ -24,21 +24,17 @@ class TagValidator:
     # Placeholder characters are checked elsewhere, but by default allowed
     TAG_ALLOWED_CHARS = "-_/"
 
-    def __init__(self, hed_schema=None, run_semantic_validation=True):
+    def __init__(self, hed_schema=None):
         """Constructor for the Tag_Validator class.
 
         Parameters:
             hed_schema (HedSchema): A HedSchema object.
-            run_semantic_validation (bool): True if the validator should check the HED data against a schema.
 
         Returns:
             TagValidator: A Tag_Validator object.
 
         """
         self._hed_schema = hed_schema
-        self._run_semantic_validation = run_semantic_validation
-        if not self._hed_schema:
-            self._run_semantic_validation = False
 
         # Dict contains all the value portion validators for value class.  e.g. "is this a number?"
         self._value_unit_validators = self._register_default_value_validators()
@@ -67,13 +63,12 @@ def run_hed_string_validators(self, hed_string_obj):
             validation_issues += self.check_tag_formatting(tag)
         return validation_issues
 
-    def run_individual_tag_validators(self, original_tag, check_for_warnings, allow_placeholders=False,
+    def run_individual_tag_validators(self, original_tag, allow_placeholders=False,
                                       is_definition=False):
         """ Runs the hed_ops on the individual tags.
 
         Parameters:
             original_tag (HedTag): A original tag.
-            check_for_warnings (bool): If True, also check for warnings.
             allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value.
             is_definition (bool): This tag is part of a Definition, not a normal line.
 
@@ -83,10 +78,10 @@ def run_individual_tag_validators(self, original_tag, check_for_warnings, allow_
          """
         validation_issues = []
         validation_issues += self.check_tag_invalid_chars(original_tag, allow_placeholders)
-        if self._run_semantic_validation:
-            validation_issues += self.check_tag_exists_in_schema(original_tag, check_for_warnings)
+        if self._hed_schema:
+            validation_issues += self.check_tag_exists_in_schema(original_tag)
             if original_tag.is_unit_class_tag():
-                validation_issues += self.check_tag_unit_class_units_are_valid(original_tag, check_for_warnings)
+                validation_issues += self.check_tag_unit_class_units_are_valid(original_tag)
             elif original_tag.is_value_class_tag():
                 validation_issues += self.check_tag_value_class_valid(original_tag)
             elif original_tag.extension_or_value_portion:
@@ -95,8 +90,7 @@ def run_individual_tag_validators(self, original_tag, check_for_warnings, allow_
             if not allow_placeholders:
                 validation_issues += self.check_for_placeholder(original_tag, is_definition)
             validation_issues += self.check_tag_requires_child(original_tag)
-        if check_for_warnings:
-            validation_issues += self.check_capitalization(original_tag)
+        validation_issues += self.check_capitalization(original_tag)
         return validation_issues
 
     def run_tag_level_validators(self, original_tag_list, is_top_level, is_group):
@@ -119,12 +113,11 @@ def run_tag_level_validators(self, original_tag_list, is_top_level, is_group):
         validation_issues += self.check_tag_level_issue(original_tag_list, is_top_level, is_group)
         return validation_issues
 
-    def run_all_tags_validators(self, tags, check_for_warnings):
+    def run_all_tags_validators(self, tags):
         """ Validate the multi-tag properties in a hed string.
 
         Parameters:
             tags (list): A list containing the HedTags in a HED string.
-            check_for_warnings (bool): If True, also check for warnings.
 
         Returns:
             list: The validation issues associated with the tags in a HED string. Each issue is a dictionary.
@@ -134,9 +127,8 @@ def run_all_tags_validators(self, tags, check_for_warnings):
 
         """
         validation_issues = []
-        if self._run_semantic_validation:
-            if check_for_warnings:
-                validation_issues += self.check_for_required_tags(tags)
+        if self._hed_schema:
+            validation_issues += self.check_for_required_tags(tags)
             validation_issues += self.check_multiple_unique_tags_exist(tags)
         return validation_issues
 
@@ -210,6 +202,9 @@ def check_delimiter_issues_in_hed_string(self, hed_string):
                     current_tag = ''
                 else:
                     issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag)
+            elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER:
+                issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=hed_string,
+                                                    char_index=i)
             elif TagValidator._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character,
                                                                           current_character):
                 issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag[:-1])
@@ -252,19 +247,20 @@ def check_tag_invalid_chars(self, original_tag, allow_placeholders):
         Returns:
             list: Validation issues. Each issue is a dictionary.
         """
+        validation_issues = self._check_invalid_prefix_issues(original_tag)
         allowed_chars = self.TAG_ALLOWED_CHARS
         if not self._hed_schema or not self._hed_schema.is_hed3_schema:
             allowed_chars += " "
         if allow_placeholders:
             allowed_chars += "#"
-        return self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag)
+        validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag)
+        return validation_issues
 
-    def check_tag_exists_in_schema(self, original_tag, check_for_warnings=False):
+    def check_tag_exists_in_schema(self, original_tag):
         """ Report invalid tag or doesn't take a value.
 
         Parameters:
             original_tag (HedTag): The original tag that is used to report the error.
-            check_for_warnings (bool): If True, also check for warnings.
 
         Returns:
             list: Validation issues. Each issue is a dictionary.
@@ -276,18 +272,17 @@ def check_tag_exists_in_schema(self, original_tag, check_for_warnings=False):
         is_extension_tag = original_tag.is_extension_allowed_tag()
         if not is_extension_tag:
             validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_EXTENSION, tag=original_tag)
-        elif check_for_warnings:
+        else:
             validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_EXTENDED, tag=original_tag,
                                                            index_in_tag=len(original_tag.org_base_tag),
                                                            index_in_tag_end=None)
         return validation_issues
 
-    def check_tag_unit_class_units_are_valid(self, original_tag, check_for_warnings):
+    def check_tag_unit_class_units_are_valid(self, original_tag):
         """ Report incorrect unit class or units.
 
         Parameters:
             original_tag (HedTag): The original tag that is used to report the error.
-            check_for_warnings (bool): Indicates whether to check for warnings.
 
         Returns:
             list: Validation issues. Each issue is a dictionary.
@@ -297,13 +292,12 @@ def check_tag_unit_class_units_are_valid(self, original_tag, check_for_warnings)
             stripped_value, unit = original_tag.get_stripped_unit_value()
             if not unit:
                 if self._validate_value_class_portion(original_tag, stripped_value):
-                    if check_for_warnings:
-                        # only suggest a unit is missing if this is a valid number
-                        if tag_validator_util.validate_numeric_value_class(stripped_value):
-                            default_unit = original_tag.get_unit_class_default_unit()
-                            validation_issues += ErrorHandler.format_error(ValidationErrors.HED_UNITS_DEFAULT_USED,
-                                                                           tag=original_tag,
-                                                                           default_unit=default_unit)
+                    # only suggest a unit is missing if this is a valid number
+                    if tag_validator_util.validate_numeric_value_class(stripped_value):
+                        default_unit = original_tag.get_unit_class_default_unit()
+                        validation_issues += ErrorHandler.format_error(ValidationErrors.HED_UNITS_DEFAULT_USED,
+                                                                       tag=original_tag,
+                                                                       default_unit=default_unit)
                 else:
                     tag_unit_class_units = original_tag.get_tag_unit_class_units()
                     if tag_unit_class_units:
@@ -412,24 +406,23 @@ def check_tag_level_issue(self, original_tag_list, is_top_level, is_group):
             - Top-level groups can contain definitions, Onset, etc tags.
         """
         validation_issues = []
-        if self._run_semantic_validation:
-            top_level_tags = [tag for tag in original_tag_list if
-                              tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)]
-            tag_group_tags = [tag for tag in original_tag_list if
-                              tag.base_tag_has_attribute(HedKey.TagGroup)]
-            for tag_group_tag in tag_group_tags:
-                if not is_group:
-                    validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG,
-                                                                   tag=tag_group_tag)
-            for top_level_tag in top_level_tags:
-                if not is_top_level:
-                    validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG,
-                                                                   tag=top_level_tag)
-
-            if is_top_level and len(top_level_tags) > 1:
-                validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS,
-                                                               tag=top_level_tags[0],
-                                                               multiple_tags=top_level_tags[1:])
+        top_level_tags = [tag for tag in original_tag_list if
+                          tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)]
+        tag_group_tags = [tag for tag in original_tag_list if
+                          tag.base_tag_has_attribute(HedKey.TagGroup)]
+        for tag_group_tag in tag_group_tags:
+            if not is_group:
+                validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG,
+                                                               tag=tag_group_tag)
+        for top_level_tag in top_level_tags:
+            if not is_top_level:
+                validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG,
+                                                               tag=top_level_tag)
+
+        if is_top_level and len(top_level_tags) > 1:
+            validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS,
+                                                           tag=top_level_tags[0],
+                                                           multiple_tags=top_level_tags[1:])
 
         return validation_issues
 
@@ -475,6 +468,15 @@ def check_multiple_unique_tags_exist(self, tags):
     # ==========================================================================
     # Private utility functions
     # =========================================================================+
+    def _check_invalid_prefix_issues(self, original_tag):
+        """Check for invalid schema prefix."""
+        issues = []
+        schema_prefix = original_tag.schema_prefix
+        if schema_prefix and not schema_prefix[:-1].isalpha():
+            issues += ErrorHandler.format_error(ValidationErrors.TAG_PREFIX_INVALID,
+                                                tag=original_tag, tag_prefix=schema_prefix)
+        return issues
+
     def _validate_value_class_portion(self, original_tag, portion_to_validate):
         if portion_to_validate is None:
             return False
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index f43bc9c86..9c80d4d98 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -1,13 +1,18 @@
 import os
-import json
 import unittest
-from hed.models import DefinitionDict, DefMapper, OnsetMapper
-from hed.models.hed_ops import apply_ops
-from hed import load_schema_version
-from hed import HedValidator
+from hed.models import DefinitionDict
+
+from hed import load_schema_version, HedString
+from hed.validator import HedValidator
 from hed import Sidecar
 import io
 import json
+from hed import HedFileError
+from hed.errors import ErrorHandler, get_printable_issue_string
+
+
+
+skip_tests = ["VERSION_DEPRECATED", "CHARACTER_INVALID", "STYLE_WARNING"]
 
 
 class MyTestCase(unittest.TestCase):
@@ -17,94 +22,79 @@ def setUpClass(cls):
                                                  'hed-specification/docs/source/_static/data/error_tests'))
         cls.test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir)
                           if os.path.isfile(os.path.join(test_dir, f))]
-        cls.fail_count = 0
+        cls.fail_count = []
         cls.default_sidecar = Sidecar(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'test_sidecar.json')))
 
-
     def run_single_test(self, test_file):
         with open(test_file, "r") as fp:
             test_info = json.load(fp)
         for info in test_info:
             error_code = info['error_code']
-            if error_code == "VERSION_DEPRECATED":
-                print("Skipping VERSION_DEPRECATED test")
+            if error_code in skip_tests:
+                print(f"Skipping {error_code} test")
                 continue
             name = info.get('name', '')
             description = info['description']
             schema = info['schema']
+            check_for_warnings = info.get("warning", False)
+            error_handler = ErrorHandler(check_for_warnings)
             if schema:
                 schema = load_schema_version(schema)
             else:
-                schema = None
+                raise ValueError("Tests always require a schema now")
             definitions = info['definitions']
-            def_dict = DefinitionDict()
-            _, issues = apply_ops(definitions, [schema, def_dict])
-            self.assertFalse(issues)
-            validator = HedValidator(schema)
-            def_mapper = DefMapper(def_dict)
-            onset_mapper = OnsetMapper(def_mapper)
+            def_dict = DefinitionDict(definitions, schema)
+            self.assertFalse(def_dict.issues)
             for section_name, section in info["tests"].items():
                 if section_name == "string_tests":
-                    self._run_single_string_test(section, validator, def_mapper,
-                                                 onset_mapper, error_code, description, name)
-                elif section_name == "sidecar_tests":
-                    self._run_single_sidecar_test(section, validator, def_mapper, onset_mapper, error_code, description,
-                                                  name)
-                elif section_name == "event_tests":
-                    self._run_single_events_test(section, validator, def_mapper, onset_mapper, error_code, description,
-                                                name)
-
-    def _run_single_string_test(self, info, validator, def_mapper, onset_mapper, error_code, description,
-                               name):
+                    self._run_single_string_test(section, schema, def_dict, error_code, description, name, error_handler)
+                if section_name == "sidecar_tests":
+                    self._run_single_sidecar_test(section, schema, def_dict, error_code, description, name, error_handler)
+                if section_name == "event_tests":
+                    self._run_single_events_test(section, schema, def_dict, error_code, description, name, error_handler)
+                if section_name == "combo_tests":
+                    self._run_single_combo_test(section, schema, def_dict, error_code, description, name, error_handler)
+
+    def report_result(self, expected_result, issues, error_code, description, name, test, test_type):
+        if expected_result == "fails":
+            if not issues:
+                print(f"{error_code}: {description}")
+                print(f"Passed '{test_type}' (which should fail) '{name}': {test}")
+                print(get_printable_issue_string(issues))
+                self.fail_count.append(name)
+        else:
+            if issues:
+                print(f"{error_code}: {description}")
+                print(f"Failed '{test_type}' test '{name}': {test}")
+                print(get_printable_issue_string(issues))
+                self.fail_count.append(name)
+
+    def _run_single_string_test(self, info, schema, def_dict, error_code, description, name, error_handler):
+        string_validator = HedValidator(hed_schema=schema, def_dicts=def_dict, run_full_onset_checks=False)
         for result, tests in info.items():
             for test in tests:
-                modified_test, issues = apply_ops(test, [validator, def_mapper, onset_mapper], check_for_warnings=True,
-                                                  expand_defs=True)
-                if modified_test and modified_test != test:
-                    _, def_expand_issues = apply_ops(modified_test, validator, check_for_warnings=True)
-                    issues += def_expand_issues
-                if result == "fails":
-                    if not issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Passed this test(that should fail) '{name}': {test}")
-                        print(issues)
-                        self.fail_count += 1
-                else:
-                    if issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Failed this test {name}: {test}")
-                        print(issues)
-
-                        self.fail_count += 1
-
-    def _run_single_sidecar_test(self, info, validator, def_mapper, onset_mapper, error_code, description,
-                               name):
-        for result, tests in info.items():
+                test_string = HedString(test, schema)
+
+                # This expand should not be required here.
+                def_dict.expand_def_tags(test_string)
+
+                issues = string_validator.run_basic_checks(test_string, False)
+                issues += string_validator.run_full_string_checks(test_string)
+                error_handler.add_context_and_filter(issues)
+                self.report_result(result, issues, error_code, description, name, test, "string_test")
 
+    def _run_single_sidecar_test(self, info, schema, def_dict, error_code, description, name, error_handler):
+        for result, tests in info.items():
             for test in tests:
                 # Well this is a disaster
                 buffer = io.BytesIO(json.dumps(test).encode("utf-8"))
                 sidecar = Sidecar(buffer)
-                issues = sidecar.validate_entries([validator, def_mapper, onset_mapper], check_for_warnings=True)
-                if result == "fails":
-                    if not issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Passed this test(that should fail) '{name}': {test}")
-                        print(issues)
-                        self.fail_count += 1
-                else:
-                    if issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Failed this test {name}: {test}")
-                        print(issues)
-
-                        self.fail_count += 1
-
-    def _run_single_events_test(self, info, validator, def_mapper, onset_mapper, error_code, description,
-                               name):
+                issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)
+                self.report_result(result, issues, error_code, description, name, test, "sidecar_test")
+
+    def _run_single_events_test(self, info, schema, def_dict, error_code, description,name, error_handler):
         from hed import TabularInput
         for result, tests in info.items():
-
             for test in tests:
                 string = ""
                 for row in test:
@@ -120,26 +110,48 @@ def _run_single_events_test(self, info, validator, def_mapper, onset_mapper, err
                 file_obj = io.BytesIO(string.encode("utf-8"))
 
                 file = TabularInput(file_obj, sidecar=self.default_sidecar)
-                issues = file.validate_file([validator, def_mapper, onset_mapper], check_for_warnings=True)
-                if result == "fails":
-                    if not issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Passed this test(that should fail) '{name}': {test}")
-                        print(issues)
-                        self.fail_count += 1
-                else:
-                    if issues:
-                        print(f"{error_code}: {description}")
-                        print(f"Failed this test {name}: {test}")
-                        print(issues)
-
-                        self.fail_count += 1
-
-    def test_summary(self):
+                issues = file.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)
+                self.report_result(result, issues, error_code, description, name, test, "events_test")
+
+    def _run_single_combo_test(self, info, schema, def_dict, error_code, description,name, error_handler):
+        from hed import TabularInput
+        for result, tests in info.items():
+            for test in tests:
+                buffer = io.BytesIO(json.dumps(test['sidecar']).encode("utf-8"))
+                sidecar = Sidecar(buffer)
+                sidecar.loaded_dict.update(self.default_sidecar.loaded_dict)
+                issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)
+                string = ""
+                try:
+                    for row in test['events']:
+                        if not isinstance(row, list):
+                            print(f"Improper grouping in test: {error_code}:{name}")
+                            print(f"Improper data for test {name}: {test}")
+                            print(f"This is probably a missing set of square brackets.")
+                            break
+                        string += "\t".join(str(x) for x in row) + "\n"
+
+                    if not string:
+                        print(F"Invalid blank events found in test: {error_code}:{name}")
+                        continue
+                    file_obj = io.BytesIO(string.encode("utf-8"))
+
+                    file = TabularInput(file_obj, sidecar=sidecar)
+                except HedFileError:
+                    print(f"{error_code}: {description}")
+                    print(f"Improper data for test {name}: {test}")
+                    print(f"This is probably a missing set of square brackets.")
+                    continue
+                issues += file.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)
+                self.report_result(result, issues, error_code, description, name, test, "combo_tests")
+
+    def test_errors(self):
         for test_file in self.test_files:
             self.run_single_test(test_file)
-        print(f"{self.fail_count} tests got an unexpected result")
-        self.assertEqual(self.fail_count, 0)
+        print(f"{len(self.fail_count)} tests got an unexpected result")
+        print("\n".join(self.fail_count))
+        self.assertEqual(len(self.fail_count), 0)
 
 if __name__ == '__main__':
     unittest.main()
+
diff --git a/tests/data/model_tests/na_tag_column.tsv b/tests/data/model_tests/na_tag_column.tsv
new file mode 100644
index 000000000..d42bbb34b
--- /dev/null
+++ b/tests/data/model_tests/na_tag_column.tsv
@@ -0,0 +1,2 @@
+Geometric-object	Event
+Square
diff --git a/tests/data/model_tests/na_value_column.json b/tests/data/model_tests/na_value_column.json
new file mode 100644
index 000000000..72a1d0af7
--- /dev/null
+++ b/tests/data/model_tests/na_value_column.json
@@ -0,0 +1,5 @@
+{
+	"Value": {
+		"HED": "Description/#"
+	}
+}
\ No newline at end of file
diff --git a/tests/data/model_tests/na_value_column.tsv b/tests/data/model_tests/na_value_column.tsv
new file mode 100644
index 000000000..91d00351e
--- /dev/null
+++ b/tests/data/model_tests/na_value_column.tsv
@@ -0,0 +1,3 @@
+HED	Value
+Geometric-object	1
+Square	n/a
diff --git a/tests/data/model_tests/no_column_header_definition.tsv b/tests/data/model_tests/no_column_header_definition.tsv
index 27c89d11c..418391ef9 100644
--- a/tests/data/model_tests/no_column_header_definition.tsv
+++ b/tests/data/model_tests/no_column_header_definition.tsv
@@ -1,2 +1,2 @@
-Geometric-object	Event, (Definition/DefTest1, (Circle))
-Square	Item, Def/DefTest1
+Geometric-object	Event
+Circle	Item,Def/DefTest1
diff --git a/tests/data/model_tests/no_column_header_definition_long.tsv b/tests/data/model_tests/no_column_header_definition_long.tsv
index c58990c03..835457f00 100644
--- a/tests/data/model_tests/no_column_header_definition_long.tsv
+++ b/tests/data/model_tests/no_column_header_definition_long.tsv
@@ -1,2 +1,2 @@
-Item/Object/Geometric-object	Event,(Property/Organizational-property/Definition/DefTest1,(InvalidDefTag))
-Item/Object/Geometric-object/2D-shape/Circle	Item,Property/Organizational-property/Def/DefTest1
+Item/Object/Geometric-object	Event
+Item/Object/Geometric-object/2D-shape/Ellipse/Circle	Item,Property/Organizational-property/Def/DefTest1
diff --git a/tests/data/validator_tests/bids_events_HED.json b/tests/data/validator_tests/bids_events_HED.json
index 8cb2d6ba4..4158d47ec 100644
--- a/tests/data/validator_tests/bids_events_HED.json
+++ b/tests/data/validator_tests/bids_events_HED.json
@@ -8,8 +8,7 @@
         "Units": "s"
     },
     "HED": {
-        "Description": "This is a column to verity the often reserved HED name causes no issues.",
+        "Description": "This is a column to verity the often reserved HED name does cause issues.",
         "Units": "s"
     }
-
 }
\ No newline at end of file
diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py
index 97efc8316..8314072bd 100644
--- a/tests/models/test_base_file_input.py
+++ b/tests/models/test_base_file_input.py
@@ -3,7 +3,6 @@
 import shutil
 from hed import Sidecar
 from hed import BaseInput, TabularInput
-from hed.models.def_mapper import DefMapper
 from hed.models.column_mapper import ColumnMapper
 from hed.models import DefinitionDict
 from hed import schema
@@ -40,32 +39,20 @@ def setUpClass(cls):
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
         mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
         cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
-                                    name="face_sub1_events", mapper=mapper1,
-                                    definition_columns=['HED'], allow_blank_names=False)
+                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
         cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
 
     @classmethod
     def tearDownClass(cls):
         shutil.rmtree(cls.base_output_folder)
 
-    def test_get_definitions(self):
-        defs1 = self.input_data1.get_definitions(as_strings=True)
-        self.assertIsInstance(defs1, dict, "get_definitions returns dictionary when as strings")
-        self.assertEqual(len(defs1), 17, "get_definitions should have the right number of definitions")
-
-        defs2 = self.input_data1.get_definitions()
-        self.assertIsInstance(defs2, DefMapper, "get_definitions returns a DefMapper by default")
-
-        defs3 = self.input_data2.get_definitions(as_strings=False)
-        self.assertIsInstance(defs3, DefMapper, "get_definitions returns a DefMapper when not as strings")
-
     def test_gathered_defs(self):
         # todo: add unit tests for definitions in tsv file
-        defs = DefinitionDict.get_as_strings(self.tabular_file.def_dict)
+        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
         expected_defs = {
             'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
             'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
-            'jsonfiledef3': '(Item/JsonDef3/#,InvalidTag)',
+            'jsonfiledef3': '(Item/JsonDef3/#)',
             'takesvaluedef': '(Age/#)',
             'valueclassdef': '(Acceleration/#)'
         }
diff --git a/tests/models/test_column_mapper.py b/tests/models/test_column_mapper.py
index c2eeea109..78a6b99a9 100644
--- a/tests/models/test_column_mapper.py
+++ b/tests/models/test_column_mapper.py
@@ -1,8 +1,7 @@
 import unittest
 import os
 
-from hed.models import ColumnMapper, ColumnType, ColumnMetadata, HedString, model_constants
-from hed.schema import load_schema
+from hed.models import ColumnMapper, ColumnType, HedString
 from hed.models.sidecar import Sidecar
 
 
@@ -44,11 +43,6 @@ def setUpClass(cls):
         cls.short_tag_partial_prefix = 'Language-item/Character/'
         cls.short_tag_partial_prefix2 = 'Character/'
 
-    def test_set_column_prefix_dict(self):
-        mapper = ColumnMapper()
-        mapper.set_column_prefix_dict(self.column_prefix_dictionary, True)
-        self.assertTrue(len(mapper._final_column_map) == 3)
-
     def test_set_tag_columns(self):
         mapper = ColumnMapper()
         mapper.set_tag_columns(self.zero_based_tag_columns, finalize_mapping=True)
@@ -112,94 +106,12 @@ def test_set_column_map(self):
         mapper.set_column_map(self.test_column_map)
         self.assertTrue(len(mapper._final_column_map) >= 1)
 
-    def test__set_column_prefix(self):
-        mapper = ColumnMapper()
-        mapper._set_column_prefix(mapper._final_column_map, self.add_column_number, self.required_prefix)
-        self.assertTrue(len(mapper._final_column_map) >= 1)
-
-        mapper = ColumnMapper()
-        with self.assertRaises(TypeError):
-            mapper._set_column_prefix(mapper._final_column_map, self.add_column_name, self.required_prefix)
-
     def test__finalize_mapping(self):
         mapper = ColumnMapper()
         mapper.add_columns([self.add_column_number], ColumnType.Value)
         mapper._finalize_mapping()
         self.assertTrue(len(mapper._final_column_map) >= 1)
 
-    def test_expand_column(self):
-        mapper = ColumnMapper()
-        mapper._set_sidecar(Sidecar(self.basic_events_json))
-        mapper.set_column_map(self.basic_column_map)
-        expanded_column = mapper._expand_column(2, "go")
-        self.assertTrue(isinstance(expanded_column[0], HedString))
-
-    def test_expand_row_tags(self):
-        mapper = ColumnMapper()
-        mapper._set_sidecar(Sidecar(self.basic_events_json))
-        mapper.add_columns(self.basic_hed_tags_column)
-        mapper.set_column_map(self.basic_column_map)
-        expanded_row = mapper.expand_row_tags(self.basic_event_row)
-        self.assertTrue(isinstance(expanded_row, dict))
-        self.assertTrue(0 in expanded_row[model_constants.COLUMN_TO_HED_TAGS])
-
-    def test_expansion_issues(self):
-        mapper = ColumnMapper()
-        mapper._set_sidecar(Sidecar(self.basic_events_json))
-        mapper.add_columns(self.basic_hed_tags_column)
-        mapper.set_column_map(self.basic_column_map)
-        expanded_row = mapper.expand_row_tags(self.basic_event_row_invalid)
-        column_issues = expanded_row[model_constants.COLUMN_ISSUES][2]
-        self.assertEqual(len(column_issues), 1)
-        self.assertTrue(0 in expanded_row[model_constants.COLUMN_TO_HED_TAGS])
-
-    def test_remove_prefix_if_needed(self):
-        mapper = ColumnMapper()
-        mapper.set_column_prefix_dict({self.add_column_number: self.required_prefix})
-        remove_prefix_func = mapper.get_prefix_remove_func(self.add_column_number)
-        test_string_obj = HedString(self.complex_hed_tag_required_prefix)
-        no_prefix_string = test_string_obj.get_as_form("org_tag", remove_prefix_func)
-        self.assertEqual(str(no_prefix_string), str(self.complex_hed_tag_no_prefix))
-
-    def test__prepend_prefix_to_required_tag_column_if_needed(self):
-        category_tags = HedString('Participant response, Stimulus')
-        ColumnMetadata._prepend_required_prefix(category_tags, self.category_key)
-        self.assertIsInstance(category_tags, HedString)
-        self.assertEqual(str(category_tags), str(self.category_participant_and_stimulus_tags))
-
-    # Verify reading/writing a short tag to a file column with a name_prefix works
-    def test_add_prefix_verify_short_tag_conversion(self):
-        schema_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.schema_file)
-        hed_schema = load_schema(schema_file)
-        hed_string_obj = HedString(self.short_tag_with_missing_prefix)
-        ColumnMetadata._prepend_required_prefix(hed_string_obj, self.short_tag_key)
-        issues = hed_string_obj.convert_to_canonical_forms(hed_schema)
-        self.assertFalse(issues)
-        for tag in hed_string_obj.get_all_tags():
-            self.assertEqual("Character/D", tag.short_tag)
-
-    def test_add_prefix_verify_short_tag_read(self):
-        column_mapper = ColumnMapper(column_prefix_dictionary={0: self.short_tag_key})
-        test_strings = {
-            'test_no_prefix': self.short_tag_with_missing_prefix,
-            'test_full_prefix': self.short_tag_key + self.short_tag_with_missing_prefix,
-            'test_partial_prefix1': self.short_tag_partial_prefix + self.short_tag_with_missing_prefix,
-            'test_partial_prefix2': self.short_tag_partial_prefix2 + self.short_tag_with_missing_prefix,
-        }
-        expected_results = {
-            'test_no_prefix': self.short_tag_key + self.short_tag_with_missing_prefix,
-            'test_full_prefix': self.short_tag_key + self.short_tag_with_missing_prefix,
-            'test_partial_prefix1': self.short_tag_partial_prefix + self.short_tag_with_missing_prefix,
-            'test_partial_prefix2': self.short_tag_partial_prefix2 + self.short_tag_with_missing_prefix,
-        }
-
-        for test_key in test_strings:
-            test_string = test_strings[test_key]
-            expected_result = expected_results[test_key]
-
-            expanded_row = column_mapper.expand_row_tags([test_string])
-            prepended_hed_string = expanded_row[model_constants.COLUMN_TO_HED_TAGS][0]
-            self.assertEqual(expected_result, str(prepended_hed_string))
 
 
 if __name__ == '__main__':
diff --git a/tests/models/test_def_mapper.py b/tests/models/test_def_mapper.py
deleted file mode 100644
index 4f38c88da..000000000
--- a/tests/models/test_def_mapper.py
+++ /dev/null
@@ -1,292 +0,0 @@
-import unittest
-import os
-
-from hed import schema
-from hed.models import DefinitionDict, DefMapper, HedString
-from hed.validator import HedValidator
-from hed.errors import ErrorHandler, ErrorContext
-
-
-class Test(unittest.TestCase):
-    basic_hed_string_with_def_first_paren = None
-
-    @classmethod
-    def setUpClass(cls):
-        cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
-        hed_xml_file = os.path.realpath(os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml"))
-        cls.hed_schema = schema.load_schema(hed_xml_file)
-        cls.def_contents_string = "(Item/TestDef1,Item/TestDef2)"
-        cls.basic_definition_string = f"(Definition/TestDef,{cls.def_contents_string})"
-        cls.basic_definition_string_no_paren = f"Definition/TestDef,{cls.def_contents_string}"
-        cls.label_def_string = "Def/TestDef"
-        cls.expanded_def_string = f"(Def-expand/TestDef,{cls.def_contents_string})"
-        cls.basic_hed_string = "Item/BasicTestTag1,Item/BasicTestTag2"
-        cls.basic_hed_string_with_def = f"{cls.basic_hed_string},{cls.label_def_string}"
-        cls.basic_hed_string_with_def_first = f"{cls.label_def_string},{cls.basic_hed_string}"
-        cls.basic_hed_string_with_def_first_paren = f"({cls.label_def_string},{cls.basic_hed_string})"
-        cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471"
-        cls.placeholder_definition_contents = "(Item/TestDef1/#,Item/TestDef2)"
-        cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents})"
-        cls.placeholder_definition_string_no_paren = \
-            f"Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents}"
-        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))"
-
-        cls.placeholder_hed_string_with_def = f"{cls.basic_hed_string},{cls.placeholder_label_def_string}"
-        cls.placeholder_hed_string_with_def_first = f"{cls.placeholder_label_def_string},{cls.basic_hed_string}"
-        cls.placeholder_hed_string_with_def_first_paren = f"({cls.placeholder_label_def_string},{cls.basic_hed_string})"
-
-        cls.valid_definition_strings = {
-            'str_no_defs': False,
-            'str2': True,
-            'str3': False,
-            'str4': False,
-            'str5': False,
-            'str6': False,
-            'str7': False,
-        }
-        cls.mark_all_as_valid_strings = {
-            'str_no_defs': False,
-            'str2': False,
-            'str3': False,
-            'str4': False,
-            'str5': False,
-            'str6': False,
-            'str7': False,
-        }
-
-    def base_def_validator(self, test_strings, result_strings, valid_strings, expand_defs, shrink_defs,
-                           remove_definitions, extra_ops=None,
-                           basic_definition_string=None):
-        if not basic_definition_string:
-            basic_definition_string = self.basic_definition_string
-        def_dict = DefinitionDict()
-        def_string = HedString(basic_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-
-        def_mapper = DefMapper(def_dict)
-        hed_ops = []
-        if extra_ops:
-            hed_ops += extra_ops
-        hed_ops.append(def_mapper)
-
-        for key in test_strings:
-            string, expected_result, invalid = test_strings[key], result_strings[key], valid_strings[key]
-            test_string = HedString(string)
-            def_issues = test_string.validate(hed_ops, expand_defs=expand_defs, shrink_defs=shrink_defs,
-                                              remove_definitions=remove_definitions)
-            self.assertEqual(invalid, bool(def_issues))
-            self.assertEqual(test_string.get_as_short(), expected_result)
-
-    def test_expand_def_tags(self):
-        basic_def_strings = {
-            'str_no_defs': self.basic_definition_string,
-            'str2': self.basic_definition_string_no_paren,
-            'str3': self.basic_hed_string + "," + self.basic_definition_string,
-            'str4': self.basic_definition_string + "," + self.basic_hed_string,
-            'str5': self.basic_hed_string_with_def,
-            'str6': self.basic_hed_string_with_def_first,
-            'str7': self.basic_hed_string_with_def_first_paren,
-        }
-        expanded_def_strings = {
-            'str_no_defs': "",
-            'str2': self.basic_definition_string_no_paren,
-            'str3': self.basic_hed_string,
-            'str4': self.basic_hed_string,
-            'str5': self.basic_hed_string + "," + self.expanded_def_string,
-            'str6': self.expanded_def_string + "," + self.basic_hed_string,
-            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")"
-        }
-        expanded_def_strings_with_definition = {
-            'str_no_defs': self.basic_definition_string,
-            'str2': self.basic_definition_string_no_paren,
-            'str3': self.basic_hed_string + "," + self.basic_definition_string,
-            'str4': self.basic_definition_string + "," + self.basic_hed_string,
-            'str5': self.basic_hed_string + "," + self.expanded_def_string,
-            'str6': self.expanded_def_string + "," + self.basic_hed_string,
-            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")"
-        }
-
-        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition,
-                                self.mark_all_as_valid_strings, expand_defs=True,
-                                shrink_defs=False, remove_definitions=False)
-        self.base_def_validator(basic_def_strings, basic_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=False, shrink_defs=False, remove_definitions=False)
-        self.base_def_validator(basic_def_strings, basic_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=False, shrink_defs=True, remove_definitions=False)
-        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings,
-                                self.mark_all_as_valid_strings, expand_defs=False, shrink_defs=True,
-                                remove_definitions=False)
-        self.base_def_validator(expanded_def_strings_with_definition, expanded_def_strings_with_definition,
-                                self.mark_all_as_valid_strings, expand_defs=True, shrink_defs=False,
-                                remove_definitions=False)
-        self.base_def_validator(basic_def_strings, expanded_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=True, shrink_defs=False, remove_definitions=True)
-
-        validator = HedValidator(self.hed_schema)
-        extra_ops = [validator]
-
-        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition,
-                                self.valid_definition_strings, expand_defs=True, shrink_defs=False,
-                                extra_ops=extra_ops, remove_definitions=False)
-
-    # special case test
-    def test_changing_tag_then_def_mapping(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.basic_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-        def_mapper = DefMapper(def_dict)
-        validator = HedValidator(self.hed_schema)
-        hed_ops = [validator, def_mapper]
-
-        test_string = HedString(self.label_def_string)
-        tag = test_string.children[0]
-        tag.tag = "Organizational-property/" + str(tag)
-        def_issues = test_string.validate(hed_ops, expand_defs=True)
-        self.assertFalse(def_issues)
-        self.assertEqual(test_string.get_as_short(), f"{self.expanded_def_string}")
-
-        test_string = HedString(self.label_def_string)
-        tag = test_string.children[0]
-        tag.tag = "Organizational-property22/" + str(tag)
-        def_issues = test_string.validate(hed_ops, expand_defs=True)
-        self.assertTrue(def_issues)
-
-    def test_expand_def_tags_placeholder(self):
-        basic_def_strings = {
-            'str_no_defs': self.placeholder_definition_string,
-            'str2': self.placeholder_definition_string_no_paren,
-            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
-            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
-            'str5': self.placeholder_hed_string_with_def,
-            'str6': self.placeholder_hed_string_with_def_first,
-            'str7': self.placeholder_hed_string_with_def_first_paren,
-        }
-        expanded_def_strings = {
-            'str_no_defs': "",
-            'str2': self.placeholder_definition_string_no_paren,
-            'str3': self.basic_hed_string,
-            'str4': self.basic_hed_string,
-            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
-            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
-            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
-        }
-        expanded_def_strings_with_definition = {
-            'str_no_defs': self.placeholder_definition_string,
-            'str2': self.placeholder_definition_string_no_paren,
-            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
-            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
-            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
-            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
-            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
-        }
-
-        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, self.mark_all_as_valid_strings,
-                                expand_defs=True, shrink_defs=False,
-                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
-
-        self.base_def_validator(basic_def_strings, basic_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=False, shrink_defs=False,
-                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
-
-        self.base_def_validator(basic_def_strings, basic_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=False, shrink_defs=True,
-                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
-
-        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=False, shrink_defs=True,
-                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
-
-        self.base_def_validator(basic_def_strings, expanded_def_strings, self.mark_all_as_valid_strings,
-                                expand_defs=True, shrink_defs=False,
-                                remove_definitions=True, basic_definition_string=self.placeholder_definition_string)
-
-        validator = HedValidator(self.hed_schema)
-        extra_ops = [validator]
-        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, self.valid_definition_strings,
-                                expand_defs=True, shrink_defs=False,
-                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string,
-                                extra_ops=extra_ops)
-
-    def test_expand_def_tags_placeholder_invalid(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-        def_mapper = DefMapper(def_dict)
-
-        placeholder_label_def_string_no_placeholder = "def/TestDefPlaceholder"
-
-        test_string = HedString(placeholder_label_def_string_no_placeholder)
-        test_string.convert_to_canonical_forms(None)
-        def_issues = def_mapper.expand_def_tags(test_string)
-        self.assertEqual(str(test_string), placeholder_label_def_string_no_placeholder)
-        self.assertTrue(def_issues)
-
-        def_dict = DefinitionDict()
-        def_string = HedString(self.basic_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-        def_mapper = DefMapper(def_dict)
-
-        label_def_string_has_invalid_placeholder = "def/TestDef/54687"
-
-        test_string = HedString(label_def_string_has_invalid_placeholder)
-        test_string.convert_to_canonical_forms(None)
-        def_issues = def_mapper.expand_def_tags(test_string)
-        self.assertEqual(str(test_string), label_def_string_has_invalid_placeholder)
-        self.assertTrue(def_issues)
-
-    def test_bad_def_expand(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-        def_mapper = DefMapper(def_dict)
-
-        valid_placeholder = HedString(self.placeholder_expanded_def_string)
-        def_issues = valid_placeholder.validate(def_mapper)
-        self.assertFalse(def_issues)
-
-        invalid_placeholder = HedString("(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/21,Item/TestDef2))")
-        def_issues = invalid_placeholder.validate(def_mapper)
-        self.assertTrue(bool(def_issues))
-
-    def test_def_no_content(self):
-        def_dict = DefinitionDict()
-        def_string = HedString("(Definition/EmptyDef)")
-        def_string.convert_to_canonical_forms(None)
-        def_dict.check_for_definitions(def_string)
-        def_mapper = DefMapper(def_dict)
-
-        valid_empty = HedString("Def/EmptyDef")
-        def_issues = valid_empty.validate(def_mapper, expand_defs=True)
-        self.assertEqual(str(valid_empty), "(Def-expand/EmptyDef)")
-        self.assertFalse(def_issues)
-
-        valid_empty = HedString("Def/EmptyDef")
-        def_issues = valid_empty.validate(def_mapper, expand_defs=False)
-        self.assertFalse(def_issues)
-
-    def test_duplicate_def(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.convert_to_canonical_forms(None)
-        error_handler = ErrorHandler()
-        error_handler.push_error_context(ErrorContext.ROW, 5)
-        def_dict.check_for_definitions(def_string, error_handler=error_handler)
-        def_mapper = DefMapper([])
-        self.assertEqual(len(def_mapper.issues), 0)
-
-        def_mapper = DefMapper([def_dict, def_dict])
-        self.assertEqual(len(def_mapper.issues), 1)
-        self.assertTrue('ec_row' in def_mapper.issues[0])
-
-        def_mapper = DefMapper([def_dict, def_dict, def_dict])
-        self.assertEqual(len(def_mapper.issues), 2)
-        self.assertTrue('ec_row' in def_mapper.issues[0])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py
index a463e60a0..ee03122aa 100644
--- a/tests/models/test_definition_dict.py
+++ b/tests/models/test_definition_dict.py
@@ -3,14 +3,18 @@
 from hed.errors import ErrorHandler, DefinitionErrors
 from hed.models.hed_string import HedString
 from hed import HedTag
+from hed import load_schema_version
 
 
 class TestDefBase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.hed_schema = load_schema_version("8.0.0")
+
     def check_def_base(self, test_strings, expected_issues):
         for test_key in test_strings:
             def_dict = DefinitionDict()
-            hed_string_obj = HedString(test_strings[test_key])
-            hed_string_obj.convert_to_canonical_forms(None)
+            hed_string_obj = HedString(test_strings[test_key], self.hed_schema)
             test_issues = def_dict.check_for_definitions(hed_string_obj)
             expected_issue = expected_issues[test_key]
             # print(test_issues)
@@ -33,16 +37,16 @@ class TestDefinitionDict(TestDefBase):
     def test_check_for_definitions(self):
         def_dict = DefinitionDict()
         original_def_count = len(def_dict.defs)
-        hed_string_obj = HedString(self.basic_definition_string)
-        hed_string_obj.validate(def_dict)
+        hed_string_obj = HedString(self.placeholder_def_string, hed_schema=self.hed_schema)
+        def_dict.check_for_definitions(hed_string_obj)
         new_def_count = len(def_dict.defs)
         self.assertGreater(new_def_count, original_def_count)
 
     def test_check_for_definitions_placeholder(self):
         def_dict = DefinitionDict()
         original_def_count = len(def_dict.defs)
-        hed_string_obj = HedString(self.placeholder_def_string)
-        hed_string_obj.validate(def_dict)
+        hed_string_obj = HedString(self.placeholder_def_string, hed_schema=self.hed_schema)
+        def_dict.check_for_definitions(hed_string_obj)
         new_def_count = len(def_dict.defs)
         self.assertGreater(new_def_count, original_def_count)
 
@@ -99,6 +103,26 @@ def test_definitions(self):
 
         self.check_def_base(test_strings, expected_results)
 
+    def test_expand_defs(self):
+        test_strings = {
+            1: "Def/TestDefPlaceholder/2471,Event",
+            2: "Event,(Def/TestDefPlaceholder/2471,Event)",
+            3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event",
+        }
+
+        expected_results = {
+            1: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event",
+            2: "Event,((Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event)",
+            # this one shouldn't change as it doesn't have a parent
+            3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event",
+        }
+        def_dict = DefinitionDict()
+        definition_string = "(Definition/TestDefPlaceholder/#,(Item/TestDef1/#,Item/TestDef2))"
+        def_dict.check_for_definitions(HedString(definition_string, hed_schema=self.hed_schema))
+        for key, test_string in test_strings.items():
+            hed_string = HedString(test_string, hed_schema=self.hed_schema)
+            def_dict.expand_def_tags(hed_string)
+            self.assertEqual(str(hed_string), expected_results[key])
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py
index 7a7ee020d..2066e4e2a 100644
--- a/tests/models/test_expression_parser.py
+++ b/tests/models/test_expression_parser.py
@@ -4,6 +4,14 @@
 from hed.models.expression_parser import QueryParser
 import os
 from hed import schema
+from hed import HedTag
+
+
+def tag_terms(self):
+    if isinstance(self, HedTag):
+        if self._schema_entry:
+            return self._tag_terms
+    return (str(self).lower(),)
 
 
 class TestParser(unittest.TestCase):
@@ -14,6 +22,9 @@ def setUpClass(cls):
         hed_xml_file = os.path.join(base_data_dir, "schema_tests/HED8.0.0t.xml")
         cls.hed_schema = schema.load_schema(hed_xml_file)
 
+        HedTag._tag_terms = HedTag.tag_terms
+        HedTag.tag_terms = property(tag_terms)
+
     def base_test(self, parse_expr, search_strings):
         expression = QueryParser(parse_expr)
 
diff --git a/tests/models/test_hed_string.py b/tests/models/test_hed_string.py
index 894668d5e..af17878bb 100644
--- a/tests/models/test_hed_string.py
+++ b/tests/models/test_hed_string.py
@@ -1,5 +1,6 @@
 from hed.models import HedString
 import unittest
+from hed import load_schema_version
 
 
 class TestHedStrings(unittest.TestCase):
@@ -170,3 +171,29 @@ def test_split_hed_string(self):
         }
 
         self.compare_split_results(test_strings, expected_results)
+
+class TestHedStringShrinkDefs(unittest.TestCase):
+    hed_schema = load_schema_version("8.0.0")
+
+    def test_shrink_defs(self):
+        test_strings = {
+            1: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event",
+            2: "Event, ((Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2)),Event)",
+            # this one shouldn't change as it doesn't have a parent
+            3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event",
+            # This one is an obviously invalid def, but still shrinks
+            4: "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2), ThisDefIsInvalid),Event",
+        }
+
+        expected_results = {
+            1: "Def/TestDefPlaceholder/2471,Event",
+            2: "Event,(Def/TestDefPlaceholder/2471,Event)",
+            3: "Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2),Event",
+            4: "Def/TestDefPlaceholder/2471,Event",
+        }
+
+        for key, test_string in test_strings.items():
+            hed_string = HedString(test_string, hed_schema=self.hed_schema)
+            hed_string.shrink_defs()
+            self.assertEqual(str(hed_string), expected_results[key])
+
diff --git a/tests/models/test_hed_tag.py b/tests/models/test_hed_tag.py
index 39daeec83..9eba272eb 100644
--- a/tests/models/test_hed_tag.py
+++ b/tests/models/test_hed_tag.py
@@ -153,30 +153,4 @@ def test_determine_allows_extensions(self):
         self.assertEqual(extension_tag1_result, True)
         self.assertEqual(no_extension_tag1_result, False)
         self.assertEqual(no_extension_tag2_result, False)
-        self.assertEqual(no_extension_tag3_result, False)
-
-    def test_finding_tags_no_schema(self):
-        # Verify basic tag identification works.
-        tag = HedTag("Onset")
-        tag.convert_to_canonical_forms(hed_schema=None)
-        self.assertTrue(tag._schema_entry)
-
-        tag2 = HedTag("OtherFolders/Onset")
-        tag2.convert_to_canonical_forms(hed_schema=None)
-        self.assertTrue(tag2._schema_entry)
-
-        tag4 = HedTag("OtherFolders/Onset/Extension")
-        tag4.convert_to_canonical_forms(hed_schema=None)
-        self.assertTrue(tag4._schema_entry)
-
-        tag3 = HedTag("OtherFolders/Onset-NotOnset")
-        tag3.convert_to_canonical_forms(hed_schema=None)
-        self.assertFalse(tag3._schema_entry)
-
-        tag = HedTag("Onset")
-        tag.convert_to_canonical_forms(hed_schema=self.hed_schema)
-        self.assertTrue(tag._schema_entry)
-
-        tag2 = HedTag("Property/Data-property/Data-marker/Temporal-marker/Onset")
-        tag2.convert_to_canonical_forms(hed_schema=self.hed_schema)
-        self.assertTrue(tag._schema_entry)
+        self.assertEqual(no_extension_tag3_result, False)
\ No newline at end of file
diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py
index 14f5ff68a..1925745ae 100644
--- a/tests/models/test_sidecar.py
+++ b/tests/models/test_sidecar.py
@@ -8,6 +8,7 @@
 from hed.validator import HedValidator
 from hed import schema
 from hed.models import DefinitionDict
+from hed.errors import ErrorHandler
 
 
 class Test(unittest.TestCase):
@@ -80,35 +81,28 @@ def test__iter__(self):
         self.assertEqual(columns_target, columns_count)
 
     def test_validate_column_group(self):
-        validator = HedValidator(hed_schema=None)
-        # validation_issues = self.json_def_sidecar.validate_entries(validator, check_for_warnings=True)
-        # self.assertEqual(len(validation_issues), 0)
-        #
-        # validation_issues = self.default_sidecar.validate_entries(validator, check_for_warnings=True)
-        # self.assertEqual(len(validation_issues), 0)
+        validation_issues = self.errors_sidecar.validate(self.hed_schema)
+        self.assertEqual(len(validation_issues), 22)
 
-        validation_issues = self.errors_sidecar.validate_entries(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues), 4)
+        validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema)
+        self.assertEqual(len(validation_issues2), 18)
 
-        validation_issues2 = self.errors_sidecar_minor.validate_entries(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues2), 10)
+        validation_issues = self.json_without_definitions_sidecar.validate(self.hed_schema)
+        self.assertEqual(len(validation_issues), 8)
 
-        validation_issues = self.json_without_definitions_sidecar.validate_entries(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues), 1)
-
-        hed_string = HedString("(Definition/JsonFileDef/#, (Item/JsonDef1/#,Item/JsonDef1))")
+        hed_string = HedString("(Definition/JsonFileDef/#, (Item/JsonDef1/#,Item/JsonDef1))", self.hed_schema)
         extra_def_dict = DefinitionDict()
-        hed_string.validate(extra_def_dict)
+        extra_def_dict.check_for_definitions(hed_string)
 
-        validation_issues = self.json_without_definitions_sidecar.validate_entries(validator, check_for_warnings=True,
-                                                                                   extra_def_dicts=extra_def_dict)
-        self.assertEqual(len(validation_issues), 0)
+        validation_issues2 = self.json_without_definitions_sidecar.validate(self.hed_schema, extra_def_dicts=extra_def_dict)
+        # this removes one undef matched error and adds two extended tag warnings
+        self.assertEqual(len(validation_issues2), 9)
 
     def test_duplicate_def(self):
         sidecar = self.json_def_sidecar
-        def_dicts = sidecar.get_def_dicts()
 
-        issues = sidecar.validate_entries(extra_def_dicts=def_dicts)
+        duplicate_dict = sidecar.extract_definitions(hed_schema=self.hed_schema)
+        issues = sidecar.validate(self.hed_schema, extra_def_dicts=duplicate_dict, error_handler=ErrorHandler(False))
         self.assertEqual(len(issues), 5)
         self.assertTrue(issues[0]['code'], ValidationErrors.HED_DEFINITION_INVALID)
 
@@ -120,7 +114,7 @@ def test_save_load(self):
         reloaded_sidecar = Sidecar(save_filename)
 
         for str1, str2 in zip(sidecar.hed_string_iter(), reloaded_sidecar.hed_string_iter()):
-            self.assertEqual(str1, str2)
+            self.assertEqual(str1[0], str2[0])
 
     def test_save_load2(self):
         sidecar = Sidecar(self.json_def_filename)
@@ -129,7 +123,7 @@ def test_save_load2(self):
         reloaded_sidecar = Sidecar(io.StringIO(json_string))
 
         for str1, str2 in zip(sidecar.hed_string_iter(), reloaded_sidecar.hed_string_iter()):
-            self.assertEqual(str1, str2)
+            self.assertEqual(str1[0], str2[0])
 
     def test_merged_sidecar(self):
         base_folder = self.base_data_dir + "sidecar_tests/"
diff --git a/tests/models/test_spreadsheet_input.py b/tests/models/test_spreadsheet_input.py
index feac77f35..9fc8f5827 100644
--- a/tests/models/test_spreadsheet_input.py
+++ b/tests/models/test_spreadsheet_input.py
@@ -51,20 +51,13 @@ def test_all(self):
         file_input = SpreadsheetInput(hed_input, has_column_names=has_column_names, worksheet_name=worksheet_name,
                                       tag_columns=tag_columns, column_prefix_dictionary=column_prefix_dictionary)
 
-        for column_to_hed_tags in file_input:
-            break_here = 3
+        self.assertTrue(isinstance(file_input.dataframe_a, pd.DataFrame))
+        self.assertTrue(isinstance(file_input.series_a, pd.Series))
+        self.assertTrue(file_input.dataframe_a.size)
 
         # Just make sure this didn't crash for now
         self.assertTrue(True)
 
-    def test_get_row_hed_tags(self):
-        row_dict = self.generic_file_input._mapper.expand_row_tags(self.row_with_hed_tags)
-        column_to_hed_tags_dictionary = row_dict[model_constants.COLUMN_TO_HED_TAGS]
-        # self.assertIsInstance(hed_string, HedString)
-        # self.assertTrue(hed_string)
-        self.assertIsInstance(column_to_hed_tags_dictionary, dict)
-        self.assertTrue(column_to_hed_tags_dictionary)
-
     def test_file_as_string(self):
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/validator_tests/bids_events_no_index.tsv')
@@ -72,15 +65,14 @@ def test_file_as_string(self):
         json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "../data/validator_tests/bids_events.json")
         sidecar = Sidecar(json_path)
-        self.assertEqual(len(sidecar.validate_entries(expand_defs=True)), 0)
+        self.assertEqual(len(sidecar.validate(self.hed_schema)), 0)
         input_file = TabularInput(events_path, sidecar=sidecar)
 
         with open(events_path) as file:
             events_file_as_string = io.StringIO(file.read())
         input_file_from_string = TabularInput(file=events_file_as_string, sidecar=sidecar)
 
-        for column_dict, column_dict in zip(input_file, input_file_from_string):
-            self.assertEqual(column_dict, column_dict)
+        self.assertTrue(input_file._dataframe.equals(input_file_from_string._dataframe))
 
     def test_bad_file_inputs(self):
         self.assertRaises(HedFileError, TabularInput, None)
@@ -115,7 +107,7 @@ def test_to_excel_should_work(self):
                                        column_prefix_dictionary={1: 'Label/', 3: 'Description/'},
                                        name='ExcelOneSheet.xlsx')
         buffer = io.BytesIO()
-        spreadsheet.to_excel(buffer, output_processed_file=True)
+        spreadsheet.to_excel(buffer, output_assembled=True)
         buffer.seek(0)
         v = buffer.getvalue()
         self.assertGreater(len(v), 0, "It should have a length greater than 0")
@@ -145,23 +137,13 @@ def test_loading_and_reset_mapper(self):
         json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "../data/validator_tests/bids_events.json")
         sidecar = Sidecar(json_path)
-        self.assertEqual(len(sidecar.validate_entries()), 0)
+        self.assertEqual(len(sidecar.validate(self.hed_schema)), 0)
         input_file_1 = TabularInput(events_path, sidecar=sidecar)
         input_file_2 = TabularInput(events_path, sidecar=sidecar)
 
         input_file_2.reset_column_mapper()
 
-        for (row_number, row_dict), (row_number2, row_dict2) in \
-                zip(enumerate(input_file_1.iter_dataframe(return_string_only=False)),
-                    enumerate(input_file_2.iter_dataframe(return_string_only=False))):
-            self.assertEqual(row_number, row_number2,
-                             f"TabularInput should have row {row_number} equal to {row_number2} after reset")
-            column_dict = row_dict["column_to_hed_tags"]
-            self.assertTrue(len(column_dict) == 5,
-                            f"The column dictionary for row {row_number} should have the right length")
-            column_dict2 = row_dict2["column_to_hed_tags"]
-            self.assertTrue(len(column_dict2) == 0,
-                            f"The reset column dictionary for row {row_number2} should have the right length")
+        self.assertTrue(input_file_1.dataframe.equals(input_file_2.dataframe))
 
     def test_no_column_header_and_convert(self):
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
@@ -172,18 +154,7 @@ def test_no_column_header_and_convert(self):
         events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                         '../data/model_tests/no_column_header_long.tsv')
         hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[1, 2])
-        for column1, column2 in zip(hed_input, hed_input_long):
-            self.assertEqual(column1, column2)
-
-        events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                   '../data/model_tests/no_column_header.tsv')
-        hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2])
-        events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                        '../data/model_tests/no_column_header_long.tsv')
-        hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[1, 2])
-        hed_input_long.convert_to_short(self.hed_schema)
-        for column1, column2 in zip(hed_input, hed_input_long):
-            self.assertEqual(column1, column2)
+        self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe))
 
     def test_convert_short_long_with_definitions(self):
         # Verify behavior works as expected even if definitions are present
@@ -195,37 +166,17 @@ def test_convert_short_long_with_definitions(self):
         events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                         '../data/model_tests/no_column_header_definition_long.tsv')
         hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[1, 2])
-        for column1, column2 in zip(hed_input, hed_input_long):
-            self.assertEqual(column1, column2)
-
-    def test_convert_short_long_with_definitions_new_style(self):
-        # Verify behavior works as expected even if definitions are present
-        events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                   '../data/model_tests/no_column_header_definition.tsv')
-        hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2],
-                                     hed_schema=self.hed_schema)
-        hed_input.convert_to_long()
-
-        events_path_long = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                        '../data/model_tests/no_column_header_definition_long.tsv')
-        hed_input_long = SpreadsheetInput(events_path_long, has_column_names=False, tag_columns=[1, 2])
-        for column1, column2 in zip(hed_input, hed_input_long):
-            self.assertEqual(column1, column2)
+        self.assertTrue(hed_input._dataframe.equals(hed_input_long._dataframe))
 
     def test_definitions_identified(self):
+        # Todo ian: this test is no longer relevant
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/model_tests/no_column_header_definition.tsv')
-        hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2],
-                                     hed_schema=self.hed_schema)
-        def_entry = hed_input.def_dict['deftest1']
-        tag = def_entry.contents.tags()[0]
-        self.assertTrue(tag._schema_entry)
+        hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2])
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/model_tests/no_column_header_definition.tsv')
         hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2])
-        def_entry = hed_input.def_dict['deftest1']
-        tag = def_entry.contents.tags()[0]
-        self.assertFalse(tag._schema_entry)
+
 
     def test_loading_dataframe_directly(self):
         ds_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
@@ -236,9 +187,22 @@ def test_loading_dataframe_directly(self):
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/model_tests/no_column_header_definition.tsv')
         hed_input2 = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2])
-        for column1, column2 in zip(hed_input, hed_input2):
-            self.assertEqual(column1, column2)
+        self.assertTrue(hed_input._dataframe.equals(hed_input2._dataframe))
 
+    def test_ignoring_na_column(self):
+        events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   '../data/model_tests/na_tag_column.tsv')
+        hed_input = SpreadsheetInput(events_path, has_column_names=False, tag_columns=[1, 2])
+        self.assertTrue(hed_input.dataframe_a.loc[1, 1] == 'n/a')
+
+    def test_ignoring_na_value_column(self):
+        from hed import TabularInput
+        events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   '../data/model_tests/na_value_column.tsv')
+        sidecar_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   '../data/model_tests/na_value_column.json')
+        hed_input = TabularInput(events_path, sidecar=sidecar_path)
+        self.assertTrue(hed_input.dataframe_a.loc[1, 'Value'] == 'n/a')
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py
index f514ef5ff..d306582fb 100644
--- a/tests/models/test_tabular_input.py
+++ b/tests/models/test_tabular_input.py
@@ -4,8 +4,8 @@
 
 from hed.models import DefinitionEntry, Sidecar, TabularInput
 from hed import schema
-from hed.validator import HedValidator
 from hed.errors import HedFileError
+from hed.errors import ErrorHandler
 
 
 class Test(unittest.TestCase):
@@ -32,38 +32,17 @@ def setUpClass(cls):
     def tearDownClass(cls):
         shutil.rmtree(cls.base_output_folder)
 
-    def test_get_definitions(self):
-        input_data = TabularInput(self.events_path, sidecar=self.sidecar1, name="face_sub1_events")
-        defs1 = input_data.get_definitions().gathered_defs
-        self.assertIsInstance(defs1, dict, "get_definitions returns dictionary by default")
-        self.assertEqual(len(defs1), 17, "get_definitions should have the right number of definitions")
-        for key, value in defs1.items():
-            self.assertIsInstance(key, str, "get_definitions dictionary keys should be strings")
-            self.assertIsInstance(value, DefinitionEntry,
-                                  "get_definitions dict values should be strings when as strings")
-        defs2 = input_data.get_definitions(as_strings=False).gathered_defs
-        self.assertIsInstance(defs2, dict, "get_definitions returns dictionary by when not as strings")
-        self.assertEqual(len(defs2), 17, "get_definitions should have the right number of definitions when not strings")
-        for key, value in defs2.items():
-            self.assertIsInstance(key, str, "get_definitions dictionary keys should be strings")
-            self.assertIsInstance(value, DefinitionEntry,
-                                  "get_definitions dictionary values should be strings when as strings")
-        self.assertIsInstance(defs2, dict, "get_definitions returns DefinitionDict when not as strings")
-
     def test_missing_column_name_issue(self):
         events_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                     '../data/validator_tests/bids_events_bad_column_name.tsv'))
         json_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                   "../data/validator_tests/bids_events.json"))
-        validator = HedValidator(hed_schema=self.hed_schema)
-        sidecar = Sidecar(json_path, hed_schema=self.hed_schema)
-        issues = sidecar.validate_entries(validator)
+        sidecar = Sidecar(json_path)
+        issues = sidecar.validate(self.hed_schema)
         self.assertEqual(len(issues), 0)
-        input_file = TabularInput(events_path, sidecar=sidecar, hed_schema=self.hed_schema)
+        input_file = TabularInput(events_path, sidecar=sidecar)
 
-        validation_issues = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues), 0)
-        validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+        validation_issues = input_file.validate(hed_schema=self.hed_schema)
         self.assertEqual(len(validation_issues), 1)
 
     def test_expand_column_issues(self):
@@ -71,16 +50,12 @@ def test_expand_column_issues(self):
                                    '../data/validator_tests/bids_events_bad_category_key.tsv')
         json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "../data/validator_tests/bids_events.json")
-        validator = HedValidator(hed_schema=self.hed_schema)
-        sidecar = Sidecar(json_path, hed_schema=self.hed_schema)
-        issues = sidecar.validate_entries(validator)
+        sidecar = Sidecar(json_path)
+        issues = sidecar.validate(hed_schema=self.hed_schema)
         self.assertEqual(len(issues), 0)
-        input_file = TabularInput(events_path, sidecar=sidecar, hed_schema=self.hed_schema)
+        input_file = TabularInput(events_path, sidecar=sidecar)
 
-        # Fix whatever is wrong with onset tag here.  It's thinking Description/Onset continues is an invalid tag???'
-        validation_issues = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues), 0)
-        validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+        validation_issues = input_file.validate(hed_schema=self.hed_schema)
         self.assertEqual(len(validation_issues), 1)
 
     def test_blank_and_duplicate_columns(self):
@@ -98,16 +73,14 @@ def test_blank_and_duplicate_columns(self):
         #     _ = TabularInput(filepath)
 
     def test_validate_file_warnings(self):
-        validator = HedValidator(hed_schema=self.hed_schema)
-        issues1 = self.sidecar1.validate_entries(validator, check_for_warnings=True)
+        issues1 = self.sidecar1.validate(hed_schema=self.hed_schema)
         input_file1 = TabularInput(self.events_path, sidecar=self.sidecar1)
-        issues1a = input_file1.validate_file(validator, check_for_warnings=True)
+        issues1a = input_file1.validate(hed_schema=self.hed_schema)
 
-        issues2 = self.sidecar2.validate_entries(validator, check_for_warnings=False)
+        issues2 = self.sidecar1.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False))
         input_file2 = TabularInput(self.events_path, sidecar=self.sidecar2)
-        issues2a = input_file2.validate_file(validator, check_for_warnings=False)
-        # TODO: Currently does not correctly check for warnings.
-
+        issues2a = input_file2.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False))
+        breakHere = 3
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/schema/test_convert_tags.py b/tests/schema/test_convert_tags.py
index 50e30af45..ebfa134a1 100644
--- a/tests/schema/test_convert_tags.py
+++ b/tests/schema/test_convert_tags.py
@@ -25,7 +25,7 @@ def converter_base(self, test_strings, expected_results, expected_errors, conver
 
             expected_issue = self.format_errors_fully(error_handler, hed_string=test_string_obj,
                                                       params=expected_params)
-            error_handler.add_context_to_issues(test_issues)
+            error_handler.add_context_and_filter(test_issues)
 
             # print(test_key)
             # print(expected_issue)
diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py
new file mode 100644
index 000000000..f889b36f1
--- /dev/null
+++ b/tests/validator/test_def_validator.py
@@ -0,0 +1,119 @@
+import unittest
+import os
+
+from hed import schema
+from hed.models import DefinitionDict, HedString
+from hed.validator import DefValidator
+from hed.errors import ErrorHandler, ErrorContext
+
+
+class Test(unittest.TestCase):
+    basic_hed_string_with_def_first_paren = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
+        hed_xml_file = os.path.realpath(os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml"))
+        cls.hed_schema = schema.load_schema(hed_xml_file)
+        cls.def_contents_string = "(Item/TestDef1,Item/TestDef2)"
+        cls.basic_definition_string = f"(Definition/TestDef,{cls.def_contents_string})"
+        cls.basic_definition_string_no_paren = f"Definition/TestDef,{cls.def_contents_string}"
+
+        cls.placeholder_definition_contents = "(Item/TestDef1/#,Item/TestDef2)"
+        cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents})"
+        cls.placeholder_definition_string_no_paren = \
+            f"Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents}"
+
+
+
+        cls.label_def_string = "Def/TestDef"
+        cls.expanded_def_string = f"(Def-expand/TestDef,{cls.def_contents_string})"
+        cls.basic_hed_string = "Item/BasicTestTag1,Item/BasicTestTag2"
+        cls.basic_hed_string_with_def = f"{cls.basic_hed_string},{cls.label_def_string}"
+        cls.basic_hed_string_with_def_first = f"{cls.label_def_string},{cls.basic_hed_string}"
+        cls.basic_hed_string_with_def_first_paren = f"({cls.label_def_string},{cls.basic_hed_string})"
+        cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471"
+
+        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))"
+
+        cls.placeholder_hed_string_with_def = f"{cls.basic_hed_string},{cls.placeholder_label_def_string}"
+        cls.placeholder_hed_string_with_def_first = f"{cls.placeholder_label_def_string},{cls.basic_hed_string}"
+        cls.placeholder_hed_string_with_def_first_paren = f"({cls.placeholder_label_def_string},{cls.basic_hed_string})"
+
+
+    def test_expand_def_tags_placeholder_invalid(self):
+        def_validator = DefValidator()
+        def_string = HedString(self.placeholder_definition_string, self.hed_schema)
+        def_validator.check_for_definitions(def_string)
+
+        placeholder_label_def_string_no_placeholder = "Def/TestDefPlaceholder"
+
+        test_string = HedString(placeholder_label_def_string_no_placeholder, self.hed_schema)
+        def_issues = def_validator.validate_def_tags(test_string)
+        def_issues += def_validator.expand_def_tags(test_string)
+        self.assertEqual(str(test_string), placeholder_label_def_string_no_placeholder)
+        self.assertTrue(def_issues)
+
+        def_validator = DefValidator()
+        def_string = HedString(self.basic_definition_string, self.hed_schema)
+        def_validator.check_for_definitions(def_string)
+
+        label_def_string_has_invalid_placeholder = "Def/TestDef/54687"
+
+        def_validator = DefValidator()
+        def_string = HedString(self.basic_definition_string, self.hed_schema)
+        def_validator.check_for_definitions(def_string)
+
+        test_string = HedString(label_def_string_has_invalid_placeholder, self.hed_schema)
+        def_issues = def_validator.validate_def_tags(test_string)
+        def_issues += def_validator.expand_def_tags(test_string)
+        self.assertEqual(str(test_string), label_def_string_has_invalid_placeholder)
+        self.assertTrue(def_issues)
+
+
+    def test_bad_def_expand(self):
+        def_validator = DefValidator()
+        def_string = HedString(self.placeholder_definition_string, self.hed_schema)
+        def_validator.check_for_definitions(def_string)
+
+        valid_placeholder = HedString(self.placeholder_expanded_def_string, self.hed_schema)
+        def_issues = def_validator.validate_def_tags(valid_placeholder)
+        self.assertFalse(def_issues)
+
+        invalid_placeholder = HedString("(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/21,Item/TestDef2))", self.hed_schema)
+        def_issues = def_validator.validate_def_tags(invalid_placeholder)
+        self.assertTrue(bool(def_issues))
+
+
+    def test_def_no_content(self):
+
+        def_validator = DefValidator()
+        def_string = HedString("(Definition/EmptyDef)", self.hed_schema)
+        def_validator.check_for_definitions(def_string)
+
+        valid_empty = HedString("Def/EmptyDef", self.hed_schema)
+        def_issues = def_validator.validate_def_tags(valid_empty)
+        def_issues += def_validator.expand_def_tags(valid_empty)
+        self.assertEqual(str(valid_empty), "(Def-expand/EmptyDef)")
+        self.assertFalse(def_issues)
+
+        valid_empty = HedString("Def/EmptyDef", self.hed_schema)
+        def_issues = def_validator.validate_def_tags(valid_empty)
+        self.assertFalse(def_issues)
+
+    def test_duplicate_def(self):
+        def_dict = DefinitionDict()
+        def_string = HedString(self.placeholder_definition_string, self.hed_schema)
+        error_handler = ErrorHandler()
+        error_handler.push_error_context(ErrorContext.ROW, 5)
+        def_dict.check_for_definitions(def_string, error_handler=error_handler)
+        self.assertEqual(len(def_dict.issues), 0)
+
+        def_validator = DefValidator([def_dict, def_dict])
+        self.assertEqual(len(def_validator.issues), 1)
+        self.assertTrue('ec_row' in def_validator.issues[0])
+
+        def_dict = DefinitionDict([def_dict, def_dict, def_dict])
+        self.assertEqual(len(def_dict.issues), 2)
+        self.assertTrue('ec_row' in def_dict.issues[0])
+
diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py
index 6c9cb74e4..a523e33c3 100644
--- a/tests/validator/test_hed_validator.py
+++ b/tests/validator/test_hed_validator.py
@@ -4,10 +4,10 @@
 # from hed import
 from hed.errors import ErrorContext
 from hed import schema
-from hed.models import DefMapper, HedString, SpreadsheetInput, TabularInput, Sidecar
-from hed.validator import HedValidator
-
+from hed.models import HedString, SpreadsheetInput, TabularInput, Sidecar
+from hed.validator import HedValidator, DefValidator
 
+# todo: redo all this so we
 class Test(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -33,31 +33,29 @@ def setUpClass(cls):
 
     def test__validate_input(self):
         test_string_obj = HedString(self.base_hed_input)
-        validation_issues = test_string_obj.validate(self.hed_validator)
+        validation_issues = test_string_obj.validate(self.hed_schema)
         self.assertIsInstance(validation_issues, list)
 
         name = "DummyDisplayFilename.txt"
-        validation_issues = self.hed_file_with_errors.validate_file(self.hed_validator, name=name)
+        validation_issues = self.hed_file_with_errors.validate(self.hed_schema, name=name)
         self.assertIsInstance(validation_issues, list)
         self.assertTrue(name in validation_issues[0][ErrorContext.FILE_NAME])
 
     def test__validate_input_major_errors(self):
         name = "DummyDisplayFilename.txt"
-        validation_issues = self.hed_file_with_major_errors.validate_file(self.hed_validator, name=name)
+        validation_issues = self.hed_file_with_major_errors.validate(self.hed_schema, name=name)
 
         self.assertIsInstance(validation_issues, list)
         self.assertTrue(name in validation_issues[0][ErrorContext.FILE_NAME])
 
     def test__validate_input_major_errors_columns(self):
         name = "DummyDisplayFilename.txt"
-        validation_issues = self.hed_file_with_major_errors.validate_file(self.hed_validator,
-                                                                          check_for_warnings=True, name=name)
+        validation_issues = self.hed_file_with_major_errors.validate(self.hed_schema, name=name)
         self.assertIsInstance(validation_issues, list)
         self.assertTrue(name in validation_issues[0][ErrorContext.FILE_NAME])
 
     def test__validate_input_major_errors_multi_column(self):
-        validation_issues = self.hed_file_with_major_errors_multi_column.validate_file(self.hed_validator,
-                                                                                       check_for_warnings=True)
+        validation_issues = self.hed_file_with_major_errors_multi_column.validate(self.hed_schema)
         self.assertIsInstance(validation_issues, list)
         self.assertEqual(len(validation_issues), 2)
 
@@ -66,15 +64,12 @@ def test_complex_file_validation_no_index(self):
                                                     '../data/validator_tests/bids_events_no_index.tsv'))
         json_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                   '../data/validator_tests/bids_events.json'))
-        validator = HedValidator(hed_schema=self.hed_schema)
         sidecar = Sidecar(json_path)
-        issues = sidecar.validate_entries(validator)
+        issues = sidecar.validate(self.hed_schema)
         self.assertEqual(len(issues), 0)
         input_file = TabularInput(events_path, sidecar=sidecar)
 
-        validation_issues = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues), 0)
-        validation_issues = input_file.validate_file(validator)
+        validation_issues = input_file.validate(self.hed_schema)
         self.assertEqual(len(validation_issues), 0)
 
     def test_complex_file_validation_with_index(self):
@@ -84,15 +79,12 @@ def test_complex_file_validation_with_index(self):
         # hed_schema = schema.load_schema(schema_path)
         json_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                   '../data/validator_tests/bids_events.json'))
-        validator = HedValidator(hed_schema=self.hed_schema)
         sidecar = Sidecar(json_path)
-        issues = sidecar.validate_entries(validator)
+        issues = sidecar.validate(hed_schema=self.hed_schema)
         self.assertEqual(len(issues), 0)
         input_file = TabularInput(events_path, sidecar=sidecar)
 
-        validation_issues = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues), 0)
-        validation_issues = input_file.validate_file(validator)
+        validation_issues = input_file.validate(hed_schema=self.hed_schema)
         self.assertEqual(len(validation_issues), 0)
 
     def test_complex_file_validation_invalid(self):
@@ -104,17 +96,13 @@ def test_complex_file_validation_invalid(self):
         hed_schema = schema.load_schema(schema_path)
         json_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                   '../data/validator_tests/bids_events_bad_defs.json'))
-        validator = HedValidator(hed_schema=hed_schema)
         sidecar = Sidecar(json_path)
-        issues = sidecar.validate_entries(hed_ops=validator, check_for_warnings=True)
+        issues = sidecar.validate(hed_schema)
         self.assertEqual(len(issues), 4)
         input_file = TabularInput(events_path, sidecar=sidecar)
 
-        validation_issues = input_file.validate_sidecar(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues), 4)
-
-        validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues), 42)
+        validation_issues = input_file.validate(hed_schema)
+        self.assertEqual(len(validation_issues), 63)
 
     def test_complex_file_validation_invalid_definitions_removed(self):
         # This verifies definitions are being removed from sidecar strings before being added, or it will produce
@@ -128,14 +116,12 @@ def test_complex_file_validation_invalid_definitions_removed(self):
         json_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                   '../data/validator_tests/bids_events_bad_defs2.json'))
         sidecar = Sidecar(json_path)
+        issues = sidecar.validate(hed_schema)
+        self.assertEqual(len(issues), 4)
         input_file = TabularInput(events_path, sidecar=sidecar)
-        validator = HedValidator(hed_schema=hed_schema)
 
-        validation_issues1 = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues1), 4)
-
-        validation_issues = input_file.validate_file(validator)
-        self.assertEqual(len(validation_issues), 21)
+        validation_issues = input_file.validate(hed_schema)
+        self.assertEqual(len(validation_issues), 42)
 
     def test_file_bad_defs_in_spreadsheet(self):
         schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
@@ -150,9 +136,8 @@ def test_file_bad_defs_in_spreadsheet(self):
                                        column_prefix_dictionary=prefixed_needed_tag_columns,
                                        worksheet_name='LKT Events')
 
-        validator = HedValidator(hed_schema=hed_schema)
-        validation_issues = loaded_file.validate_file(validator, check_for_warnings=True)
-        self.assertEqual(len(validation_issues), 4)
+        validation_issues = loaded_file.validate(hed_schema=hed_schema)
+        self.assertEqual(len(validation_issues), 2)
 
     def test_tabular_input_with_HED_col_in_json(self):
         schema_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
@@ -163,28 +148,20 @@ def test_tabular_input_with_HED_col_in_json(self):
         hed_schema = schema.load_schema(schema_path)
         json_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                   '../data/validator_tests/bids_events_HED.json'))
-        validator = HedValidator(hed_schema=hed_schema)
         sidecar = Sidecar(json_path)
-        issues = sidecar.validate_entries(validator)
-        self.assertEqual(len(issues), 0)
+        issues = sidecar.validate(hed_schema)
+        self.assertEqual(len(issues), 1)
         input_file = TabularInput(events_path, sidecar=sidecar)
 
-        validation_issues = input_file.validate_sidecar(validator)
-        self.assertEqual(len(validation_issues), 0)
-        validation_issues = input_file.validate_file(validator)
+        validation_issues = input_file.validate(hed_schema)
         self.assertEqual(len(validation_issues), 1)
 
     def test_error_spans_from_file_and_missing_required_column(self):
-        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                   '../data/schema_tests/HED8.0.0.mediawiki')
         events_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/validator_tests/tag_error_span_test.tsv')
 
-        hed_schema = schema.load_schema(schema_path)
-
         input_file = SpreadsheetInput(events_path, tag_columns=[0, 1, "error"])
-        validator = HedValidator(hed_schema=hed_schema)
-        validation_issues = input_file.validate_file(validator)
+        validation_issues = input_file.validate(hed_schema=self.hed_schema)
         self.assertEqual(validation_issues[1]['char_index'], 6)
         self.assertEqual(validation_issues[2]['char_index'], 6)
         self.assertEqual(len(validation_issues), 3)
@@ -201,28 +178,15 @@ def test_org_tag_missing(self):
         source_span = test_string_obj._get_org_span(HedTag("Event"))
         self.assertEqual(source_span, (None, None))
 
-    def test_def_mapping_single_line(self):
-        schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                   '../data/schema_tests/HED8.0.0.mediawiki')
-        hed_schema = schema.load_schema(schema_path)
-        validator = HedValidator(hed_schema=hed_schema)
-        def_mapper = DefMapper()
-        string_with_def = \
-            '(Definition/TestDefPlaceholder/#,(Item/TestDef1/#,Item/TestDef2)), def/TestDefPlaceholder/2471'
-        test_string = HedString(string_with_def)
-        issues = test_string.validate([validator, def_mapper], check_for_definitions=True)
-        self.assertEqual(len(issues), 0)
 
     def test_duplicate_group_in_definition(self):
         schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                    '../data/schema_tests/HED8.0.0.mediawiki')
         hed_schema = schema.load_schema(schema_path)
-        validator = HedValidator(hed_schema=hed_schema)
-        def_mapper = DefMapper()
         string_with_def = \
-            '(Definition/TestDef,(Item/TestDef1,Item/TestDef1))'
-        test_string = HedString(string_with_def)
-        issues = test_string.validate([validator, def_mapper], check_for_definitions=False)
+            '(Definition/TestDef,(Item,Item))'
+        test_string = HedString(string_with_def, hed_schema)
+        issues = test_string.validate(hed_schema)
         self.assertEqual(len(issues), 1)
 
 
diff --git a/tests/models/test_onset_mapper.py b/tests/validator/test_onset_validator.py
similarity index 57%
rename from tests/models/test_onset_mapper.py
rename to tests/validator/test_onset_validator.py
index a88a45f8f..1bc814f33 100644
--- a/tests/models/test_onset_mapper.py
+++ b/tests/validator/test_onset_validator.py
@@ -1,10 +1,11 @@
+import copy
 import unittest
 import os
 
 from hed.errors import ErrorHandler, OnsetErrors, ErrorContext, ValidationErrors
-from hed.models import DefMapper, HedString, OnsetMapper, DefinitionDict
+from hed.models import HedString, DefinitionDict
 from hed import schema
-from hed.validator import HedValidator
+from hed.validator import HedValidator, OnsetValidator
 
 from tests.validator.test_tag_validator_base import TestHedBase
 
@@ -16,53 +17,66 @@ def setUpClass(cls):
         cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
         hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0.mediawiki")
         cls.hed_schema = schema.load_schema(hed_xml_file)
-        cls.placeholder_label_def_string = "def/TestDefPlaceholder/2471"
-        cls.placeholder_def_contents = "(Item/TestDef1/#,Item/TestDef2)"
+        cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471"
+        cls.placeholder_def_contents = "(Action/TestDef1/#,Action/TestDef2)"
         cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_def_contents})"
-        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))"
+        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Action/TestDef1/2471,Action/TestDef2))"
 
-        cls.label_def_string = "def/TestDefNormal"
-        cls.def_contents = "(Item/TestDef1,Item/TestDef2)"
+        cls.label_def_string = "Def/TestDefNormal"
+        cls.def_contents = "(Action/TestDef1,Action/TestDef2)"
         cls.definition_string = f"(Definition/TestDefNormal,{cls.def_contents})"
-        cls.expanded_def_string = "(Def-expand/TestDefNormal,(Item/TestDef1/2471,Item/TestDef2))"
+        cls.expanded_def_string = "(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2))"
 
-        cls.placeholder_label_def_string2 = "def/TestDefPlaceholder/123"
-        cls.placeholder_def_contents2 = "(Item/TestDef1/#,Item/TestDef2)"
+        cls.placeholder_label_def_string2 = "Def/TestDefPlaceholder/123"
+        cls.placeholder_def_contents2 = "(Action/TestDef1/#,Action/TestDef2)"
         cls.placeholder_definition_string2 = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_def_contents2})"
-        cls.placeholder_expanded_def_string2 = "(Def-expand/TestDefPlaceholder/123,(Item/TestDef1/123,Item/TestDef2))"
+        cls.placeholder_expanded_def_string2 = "(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2))"
 
-    def _test_issues_base(self, test_strings, test_issues, test_context, hed_ops, expand_defs=True):
+        cls.def_dict_placeholder = DefinitionDict()
+        def_string = HedString(cls.placeholder_definition_string, hed_schema=cls.hed_schema)
+        cls.def_dict_placeholder.check_for_definitions(def_string)
+        cls.def_dict_both = copy.deepcopy(cls.def_dict_placeholder)
+        def_string = HedString(cls.definition_string, hed_schema=cls.hed_schema)
+        cls.def_dict_both.check_for_definitions(def_string)
+
+
+    def _test_issues_base(self, test_strings, test_issues, test_context, placeholder_def_only):
+        if placeholder_def_only:
+            validator = OnsetValidator(self.def_dict_placeholder)
+        else:
+            validator = OnsetValidator(self.def_dict_both)
         for string, expected_params, context in zip(test_strings, test_issues, test_context):
-            test_string = HedString(string)
+            test_string = HedString(string, self.hed_schema)
             error_handler = ErrorHandler()
             error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
-            onset_issues = test_string.validate(hed_ops, expand_defs=expand_defs)
+
+            onset_issues = []
+            onset_issues += validator.validate_onset_offset(test_string)
+
+            error_handler.add_context_and_filter(onset_issues)
+            test_string.shrink_defs()
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
-            # print(str(onset_issues))
-            # print(str(issues))
+            print(str(onset_issues))
+            print(str(issues))
             error_handler.pop_error_context()
-            self.assertEqual(len(hed_ops[-1]._onsets), context)
+            self.assertEqual(len(validator._onsets), context)
             self.assertCountEqual(onset_issues, issues)
 
-    def _test_issues_no_context(self, test_strings, test_issues, hed_ops):
+    def _test_issues_no_context(self, test_strings, test_issues):
+        hed_validator = HedValidator(self.hed_schema, self.def_dict_both)
         for string, expected_params in zip(test_strings, test_issues):
             test_string = HedString(string)
-            error_handler = ErrorHandler()
+            error_handler = ErrorHandler(check_for_warnings=False)
             error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
-            onset_issues = test_string.validate(hed_ops, expand_defs=True)
+            onset_issues = hed_validator.validate(test_string, False)
+            error_handler.add_context_and_filter(onset_issues)
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
-            # print(str(onset_issues))
-            # print(str(issues))
+            print(str(onset_issues))
+            print(str(issues))
             error_handler.pop_error_context()
             self.assertCountEqual(onset_issues, issues)
 
     def test_basic_onset_errors(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-
         test_strings = [
             f"({self.placeholder_label_def_string},Onset)",
             f"({self.placeholder_label_def_string},Offset)",
@@ -70,9 +84,9 @@ def test_basic_onset_errors(self):
             f"({self.placeholder_label_def_string}, Onset, (Event), (Event))",
             f"({self.placeholder_label_def_string}, Onset, (Event))",
             "(Onset)",
-            f"({self.placeholder_label_def_string}, def/InvalidDef, Onset, (Event))",
-            "(def/TestDefInvalid, Onset)",
-            "(def/TestDefPlaceholder, Onset)",
+            f"({self.placeholder_label_def_string}, Def/InvalidDef, Onset, (Event))",
+            "(Def/TestDefInvalid, Onset)",
+            "(Def/TestDefPlaceholder, Onset)",
             f"({self.placeholder_label_def_string}, Offset, (Event))"
         ]
         # count of how many onset names are in the mapper after the line is run
@@ -94,26 +108,19 @@ def test_basic_onset_errors(self):
             [],
             self.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=0),
             self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0,
-                              tag_list=['def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']),
+                              tag_list=['Def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']),
             [],
             self.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, tag=0),
-            self.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, tag=0, tag_list=['def/InvalidDef']),
+            self.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, tag=0, tag_list=['Def/InvalidDef']),
             self.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=0),
             self.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=0, has_placeholder=True),
             self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0,
                               tag_list=[self.placeholder_label_def_string, 'Offset', '(Event)']),
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, [onset_mapper])
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=True)
 
     def test_basic_onset_errors_with_def_mapper(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-        hed_ops = [def_mapper, onset_mapper]
-
         test_strings = [
             f"({self.placeholder_label_def_string},Onset)",
             f"({self.placeholder_label_def_string},Offset)",
@@ -121,9 +128,9 @@ def test_basic_onset_errors_with_def_mapper(self):
             f"({self.placeholder_label_def_string}, Onset, (Event), (Event))",
             f"({self.placeholder_label_def_string}, Onset, (Event))",
             "(Onset)",
-            f"({self.placeholder_label_def_string}, def/TestDefPlaceholder/2, Onset, (Event))",
-            "(def/TestDefInvalid, Onset)",
-            "(def/TestDefPlaceholder, Onset)",
+            f"({self.placeholder_label_def_string}, Def/TestDefPlaceholder/2, Onset, (Event))",
+            "(Def/TestDefInvalid, Onset)",
+            "(Def/TestDefPlaceholder, Onset)",
             f"({self.placeholder_label_def_string}, Offset, (Event))"
         ]
         # count of how many onset names are in the mapper after the line is run
@@ -149,24 +156,16 @@ def test_basic_onset_errors_with_def_mapper(self):
             [],
             self.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, tag=0),
             self.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, tag=0,
-                              tag_list=['def/TestDefPlaceholder/2']),
-            self.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=0),
-            self.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=0),
+                              tag_list=['Def/TestDefPlaceholder/2']),
+            self.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=0),
+            self.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=0, has_placeholder=True),
             self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0,
                               tag_list=[self.placeholder_label_def_string, 'Offset', '(Event)']),
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, hed_ops, expand_defs=False)
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=True)
 
     def test_basic_onset_errors_expanded(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_string = HedString(self.definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-
         test_strings = [
             f"({self.placeholder_expanded_def_string},Onset)",
             f"({self.placeholder_expanded_def_string},Offset)",
@@ -174,10 +173,10 @@ def test_basic_onset_errors_expanded(self):
             f"({self.placeholder_expanded_def_string}, Onset, (Event), (Event))",
             f"({self.placeholder_expanded_def_string}, Onset, (Event))",
             "(Onset)",
-            f"({self.placeholder_expanded_def_string}, def/InvalidDef, Onset, (Event))",
-            "(def/TestDefInvalid, Onset)",
-            "(def/TestDefPlaceholder, Onset)",
-            "(def/TestDefNormal/InvalidPlaceholder, Onset)"
+            f"({self.placeholder_expanded_def_string}, Def/InvalidDef, Onset, (Event))",
+            "(Def/TestDefInvalid, Onset)",
+            "(Def/TestDefPlaceholder, Onset)",
+            "(Def/TestDefNormal/InvalidPlaceholder, Onset)"
         ]
         # count of how many onset names are in the mapper after the line is run
         expected_context = [
@@ -201,23 +200,15 @@ def test_basic_onset_errors_expanded(self):
                               tag_list=[self.placeholder_expanded_def_string, 'Onset', '(Event)', '(Event)']),
             [],
             self.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, tag=0),
-            self.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, tag=0, tag_list=['def/InvalidDef']),
+            self.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, tag=0, tag_list=['Def/InvalidDef']),
             self.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=0),
             self.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=0, has_placeholder=True),
             self.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=0, has_placeholder=False)
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, [onset_mapper])
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)
 
     def test_test_interleaving_onset_offset(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_string = HedString(self.definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-
         test_strings = [
             f"({self.placeholder_label_def_string},Onset)",
             f"({self.placeholder_label_def_string2},Onset)",
@@ -248,15 +239,9 @@ def test_test_interleaving_onset_offset(self):
             [],
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, [onset_mapper])
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)
 
     def test_onset_with_defs_in_them(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-
         test_strings = [
             f"({self.placeholder_label_def_string},Onset, ({self.label_def_string}))",
         ]
@@ -269,101 +254,23 @@ def test_onset_with_defs_in_them(self):
             []
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, [onset_mapper])
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=True)
 
     def test_onset_multiple_or_misplaced_errors(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_string = HedString(self.definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-        hed_validator = HedValidator(hed_schema=self.hed_schema)
-        hed_ops = [hed_validator, def_mapper, onset_mapper]
-
         test_strings = [
             f"{self.placeholder_label_def_string},Onset",
             f"({self.placeholder_label_def_string},Onset, Onset)",
             f"({self.placeholder_label_def_string},Onset, Offset)",
         ]
-        # count of issues the line generates
-        onset_list = ['Onset']
-        offset_list = ['Offset']
-        test_issues = [
-            self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
-            self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=2)
-            + self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1,
-                                multiple_tags=onset_list),
-            self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1,
-                              multiple_tags=offset_list),
-        ]
-
-        self._test_issues_no_context(test_strings, test_issues, hed_ops)
-
         test_issues = [
             self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
-            self.format_error(ValidationErrors.HED_TAG_REPEATED, tag=2)
-            + self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1,
-                                multiple_tags=onset_list),
-            self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=1,
-                              multiple_tags=offset_list),
+            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2, def_tag="Def/TestDefPlaceholder/2471"),
+            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2, def_tag="Def/TestDefPlaceholder/2471"),
         ]
 
-        # Repeat with just hed validator
-        self._test_issues_no_context(test_strings, test_issues, hed_validator)
-
-    def test_onset_multiple_or_misplaced_errors_no_validator(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_string = HedString(self.definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-        hed_ops = [def_mapper, onset_mapper]
-
-        test_strings = [
-            f"{self.placeholder_label_def_string},Onset",
-            f"({self.placeholder_label_def_string},Onset, Onset)",
-            f"({self.placeholder_label_def_string},Onset, Offset)",
-            f"({self.placeholder_label_def_string},Onset, Event)",
-        ]
-        # count of issues the line generates
-        test_issues = [
-            [],
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=4,
-                              def_tag="Def-expand/TestDefPlaceholder/2471"),
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=4,
-                              def_tag="Def-expand/TestDefPlaceholder/2471"),
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=4,
-                              def_tag="Def-expand/TestDefPlaceholder/2471"),
-        ]
-
-        self._test_issues_no_context(test_strings, test_issues, hed_ops)
-
-        # Verify it also works without def mapping
-        test_issues = [
-            [],
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2,
-                              def_tag=self.placeholder_label_def_string),
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2,
-                              def_tag=self.placeholder_label_def_string),
-            self.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, tag=2,
-                              def_tag=self.placeholder_label_def_string),
-        ]
-
-        self._test_issues_no_context(test_strings, test_issues, [hed_ops[1]])
+        self._test_issues_no_context(test_strings, test_issues)
 
     def test_onset_two_in_one_line(self):
-        def_dict = DefinitionDict()
-        def_string = HedString(self.placeholder_definition_string)
-        def_string.validate(def_dict)
-        def_string = HedString(self.definition_string)
-        def_string.validate(def_dict)
-        def_mapper = DefMapper(def_dict)
-        onset_mapper = OnsetMapper(def_mapper)
-
         test_strings = [
             f"({self.placeholder_label_def_string},Onset), ({self.placeholder_label_def_string2},Onset)",
             f"({self.placeholder_label_def_string2},Offset)",
@@ -391,7 +298,7 @@ def test_onset_two_in_one_line(self):
             []
         ]
 
-        self._test_issues_base(test_strings, test_issues, expected_context, [onset_mapper])
+        self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)
 
 
 if __name__ == '__main__':
diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py
index ea13e410a..dc0fb910a 100644
--- a/tests/validator/test_tag_validator.py
+++ b/tests/validator/test_tag_validator.py
@@ -11,8 +11,8 @@ class TestHed(TestValidatorBase):
 
 class IndividualHedTagsShort(TestHed):
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
-        return partial(validator._validate_individual_tags_in_hed_string, check_for_warnings=check_for_warnings)
+    def string_obj_func(validator):
+        return partial(validator._validate_individual_tags_in_hed_string)
 
     def test_exist_in_schema(self):
         test_strings = {
@@ -66,10 +66,10 @@ def test_exist_in_schema(self):
     def test_proper_capitalization(self):
         test_strings = {
             'proper': 'Event/Sensory-event',
-            'camelCase': 'EvEnt/Something',
+            'camelCase': 'EvEnt/Sensory-event',
             'takesValue': 'Sampling-rate/20 Hz',
             'numeric': 'Statistical-uncertainty/20',
-            'lowercase': 'Event/something'
+            'lowercase': 'Event/sensory-event'
         }
         expected_results = {
             'proper': True,
@@ -85,7 +85,7 @@ def test_proper_capitalization(self):
             'numeric': [],
             'lowercase': self.format_error(ValidationErrors.HED_STYLE_WARNING, tag=0)
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, True)
+        self.validator_semantic(test_strings, expected_results, expected_issues, True)
 
     # def test_proper_capitalization(self):
     #     test_strings = {
@@ -112,7 +112,7 @@ def test_proper_capitalization(self):
     #         'lowercase': self.format_error(ValidationErrors.HED_STYLE_WARNING, tag=0),
     #         'multipleUpper': self.format_error(ValidationErrors.HED_STYLE_WARNING, tag=0)
     #     }
-    #     self.validator_syntactic(test_strings, expected_results, expected_issues, True)
+    #     self.validator_semantic(test_strings, expected_results, expected_issues, True)
     #
     # def test_proper_capitalization_semantic(self):
     #     test_strings = {
@@ -352,7 +352,7 @@ def test_span_reporting(self):
 
 class TestTagLevels(TestHed):
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
+    def string_obj_func(validator):
         return validator._validate_groups_in_hed_string
 
     def test_no_duplicates(self):
@@ -394,7 +394,7 @@ def test_no_duplicates(self):
             'duplicateSubGroupF': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP,
                                                    group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)")),
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
     def test_no_duplicates_semantic(self):
         test_strings = {
@@ -489,14 +489,14 @@ def test_empty_groups(self):
         expected_issues = {
             'emptyGroup': self.format_error(ValidationErrors.HED_GROUP_EMPTY, tag=1000 + 1)
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
 
 class FullHedString(TestHed):
     compute_forms = False
 
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
+    def string_obj_func(validator):
         return validator._tag_validator.run_hed_string_validators
 
     def test_invalid_placeholders(self):
@@ -538,11 +538,13 @@ def test_mismatched_parentheses(self):
                                               closing_parentheses_count=1),
             'extraClosing': self.format_error(ValidationErrors.HED_PARENTHESES_MISMATCH,
                                               opening_parentheses_count=1,
-                                              closing_parentheses_count=2),
+                                              closing_parentheses_count=2)
+            +               self.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=test_strings['extraClosing'],
+                                                   char_index=84),
             'valid': []
         }
 
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
     def test_malformed_delimiters(self):
         test_strings = {
@@ -676,7 +678,7 @@ def test_malformed_delimiters(self):
                                                           tag="Thing)) "),
             # 'emptyGroup': []
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
     def test_invalid_characters(self):
         test_strings = {
@@ -705,7 +707,7 @@ def test_invalid_characters(self):
             'closingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
                                                 source_string=test_strings['closingBracket'])
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
     def test_string_extra_slash_space(self):
         test_strings = {
@@ -778,7 +780,7 @@ def test_string_extra_slash_space(self):
                                                               index_in_tag=15, index_in_tag_end=18,
                                                               tag=0),
         }
-        self.validator_syntactic(test_strings, expected_results, expected_errors, False)
+        self.validator_semantic(test_strings, expected_results, expected_errors, False)
 
     def test_no_more_than_two_tildes(self):
         test_strings = {
@@ -817,15 +819,15 @@ def test_no_more_than_two_tildes(self):
                 + self.format_error(ValidationErrors.HED_TILDES_UNSUPPORTED,
                                     source_string=test_strings['invalidTildeGroup'], char_index=147)
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
 
 class RequiredTags(TestHed):
     schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'
 
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
-        return partial(validator._validate_tags_in_hed_string, check_for_warnings=check_for_warnings)
+    def string_obj_func(validator):
+        return partial(validator._validate_tags_in_hed_string)
 
     def test_includes_all_required_tags(self):
         test_strings = {
@@ -857,13 +859,13 @@ def test_includes_all_required_tags(self):
     def test_multiple_copies_unique_tags(self):
         test_strings = {
             'legal': 'Event-context,'
-                     '(Vehicle,Event)',
+                     '(Vehicle,Event), Animal-agent, Action',
             'multipleDesc': 'Event-context,'
                             'Event-context,'
-                            'Vehicle,(Vehicle,Event-context)',
+                            'Vehicle,(Vehicle,Event-context), Animal-agent, Action',
             # I think this is illegal in hed2 style schema now.
             'multipleDescIncShort': 'Event-context,'
-                                    'Organizational-property/Event-context'
+                                    'Organizational-property/Event-context, Animal-agent, Action'
         }
         expected_results = {
             'legal': True,
@@ -885,8 +887,8 @@ class TestHedSpecialUnits(TestHed):
     schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'
 
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
-        return partial(validator._validate_individual_tags_in_hed_string, check_for_warnings=check_for_warnings)
+    def string_obj_func(validator):
+        return partial(validator._validate_individual_tags_in_hed_string)
 
     def test_special_units(self):
         test_strings = {
diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py
index df8812479..75f2b10e7 100644
--- a/tests/validator/test_tag_validator_base.py
+++ b/tests/validator/test_tag_validator_base.py
@@ -66,45 +66,38 @@ class TestValidatorBase(TestHedBase):
     def setUpClass(cls):
         super().setUpClass()
         cls.error_handler = error_reporter.ErrorHandler()
-        cls.syntactic_hed_input_reader = HedValidator(hed_schema=None,
-                                                      run_semantic_validation=False)
-        cls.syntactic_tag_validator = cls.syntactic_hed_input_reader._tag_validator
-        cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema,
-                                                     run_semantic_validation=True)
+        # cls.syntactic_hed_input_reader = HedValidator(hed_schema=None)
+        # cls.syntactic_tag_validator = cls.syntactic_hed_input_reader._tag_validator
+        cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema)
         cls.semantic_tag_validator = cls.semantic_hed_input_reader._tag_validator
 
     def validator_base(self, test_strings, expected_results, expected_issues, test_function,
-                       hed_schema=None):
+                       hed_schema=None, check_for_warnings=False):
         for test_key in test_strings:
             hed_string_obj = HedString(test_strings[test_key])
-            error_handler = ErrorHandler()
+            error_handler = ErrorHandler(check_for_warnings=check_for_warnings)
             error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, increment_depth_after=False)
             test_issues = []
             if self.compute_forms:
                 test_issues += hed_string_obj.convert_to_canonical_forms(hed_schema)
             if not test_issues:
                 test_issues += test_function(hed_string_obj)
-            test_result = not test_issues
             expected_params = expected_issues[test_key]
             expected_result = expected_results[test_key]
             expected_issue = self.format_errors_fully(error_handler, hed_string=hed_string_obj,
                                                       params=expected_params)
-            error_handler.add_context_to_issues(test_issues)
+            error_handler.add_context_and_filter(test_issues)
+            test_result = not test_issues
 
-            # print(test_key)
-            # print(str(expected_issue))
-            # print(str(test_issues))
+            print(test_key)
+            print(str(expected_issue))
+            print(str(test_issues))
             error_handler.pop_error_context()
             self.assertEqual(test_result, expected_result, test_strings[test_key])
             self.assertCountEqual(test_issues, expected_issue, test_strings[test_key])
 
-    def validator_syntactic(self, test_strings, expected_results, expected_issues, check_for_warnings):
-        validator = self.syntactic_hed_input_reader
-        self.validator_base(test_strings, expected_results, expected_issues,
-                            self.string_obj_func(validator, check_for_warnings=check_for_warnings))
-
     def validator_semantic(self, test_strings, expected_results, expected_issues, check_for_warnings):
         validator = self.semantic_hed_input_reader
         self.validator_base(test_strings, expected_results, expected_issues,
-                            self.string_obj_func(validator, check_for_warnings=check_for_warnings),
+                            self.string_obj_func(validator), check_for_warnings=check_for_warnings,
                             hed_schema=validator._hed_schema)
diff --git a/tests/validator/test_tag_validator_library.py b/tests/validator/test_tag_validator_library.py
index 15c86545e..c4552f689 100644
--- a/tests/validator/test_tag_validator_library.py
+++ b/tests/validator/test_tag_validator_library.py
@@ -43,8 +43,8 @@ def test_invalid_load_prefix(self):
 
 class IndividualHedTagsShort(TestHed3):
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
-        return partial(validator._validate_individual_tags_in_hed_string, check_for_warnings=check_for_warnings)
+    def string_obj_func(validator):
+        return partial(validator._validate_individual_tags_in_hed_string)
 
     def test_exist_in_schema(self):
         test_strings = {
@@ -102,10 +102,10 @@ def test_exist_in_schema(self):
     def test_proper_capitalization(self):
         test_strings = {
             'proper': 'tl:Event/Sensory-event',
-            'camelCase': 'tl:EvEnt/Something',
-            'takesValue': 'tl:Attribute/Temporal rate/20 Hz',
-            'numeric': 'tl:Repetition-number/20',
-            'lowercase': 'tl:Event/something'
+            'camelCase': 'tl:EvEnt/Sensory-event',
+            'takesValue': 'tl:Sampling-rate/20 Hz',
+            'numeric': 'tl:Statistical-uncertainty/20',
+            'lowercase': 'tl:Event/sensory-event'
         }
         expected_results = {
             'proper': True,
@@ -121,7 +121,7 @@ def test_proper_capitalization(self):
             'numeric': [],
             'lowercase': self.format_error(ValidationErrors.HED_STYLE_WARNING, tag=0)
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, True)
+        self.validator_semantic(test_strings, expected_results, expected_issues, True)
 
     def test_child_required(self):
         test_strings = {
@@ -302,17 +302,17 @@ def test_span_reporting(self):
 
 class TestTagLevels3(TestHed3):
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
+    def string_obj_func(validator):
         return validator._validate_groups_in_hed_string
 
     def test_no_duplicates(self):
         test_strings = {
             'topLevelDuplicate': 'tl:Event/Sensory-event,tl:Event/Sensory-event',
             'groupDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,(tl:Event/Sensory-event,'
-                              'tl:Attribute/Sensory/Visual/Color/CSS-color/Purple-color/Purple,tl:Event/Sensory-event)',
+                              'tl:Purple-color/Purple,tl:Event/Sensory-event)',
             'noDuplicate': 'tl:Event/Sensory-event,'
                            'tl:Item/Object/Man-made-object/VehicleTrain,'
-                           'tl:Attribute/Sensory/Visual/Color/CSS-color/Purple-color/Purple',
+                           'tl:Purple-color/Purple',
             'legalDuplicate': 'tl:Item/Object/Man-made-object/VehicleTrain,\
             (tl:Item/Object/Man-made-object/VehicleTrain,'
                               'tl:Event/Sensory-event)',
@@ -329,7 +329,7 @@ def test_no_duplicates(self):
             'legalDuplicate': [],
             'noDuplicate': []
         }
-        self.validator_syntactic(test_strings, expected_results, expected_issues, False)
+        self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
     def test_no_duplicates_semantic(self):
         test_strings = {
@@ -417,8 +417,8 @@ def test_taggroup_validation(self):
 
 class RequiredTags(TestHed3):
     @staticmethod
-    def string_obj_func(validator, check_for_warnings):
-        return partial(validator._validate_tags_in_hed_string, check_for_warnings=check_for_warnings)
+    def string_obj_func(validator):
+        return partial(validator._validate_tags_in_hed_string)
 
     def test_includes_all_required_tags(self):
         test_strings = {
@@ -452,12 +452,13 @@ def test_includes_all_required_tags(self):
     def test_multiple_copies_unique_tags(self):
         test_strings = {
             'legal': 'tl:Event-context,'
-                     '(Vehicle,Event)',
+                     '(Vehicle,Event), Animal-agent, Action, tl:Animal-agent, tl:Action',
             'multipleDesc': 'tl:Event-context,'
                             'tl:Event-context,'
-                            'Vehicle,(Vehicle,tl:Event-context)',
+                            'Vehicle,(Vehicle,tl:Event-context), Animal-agent, Action, tl:Animal-agent, tl:Action',
             'multipleDescIncShort': 'tl:Event-context,'
-                                    'tl:Organizational-property/Event-context'
+                                    'tl:Organizational-property/Event-context,'
+                                    ' Animal-agent, Action, tl:Animal-agent, tl:Action'
         }
         expected_results = {
             'legal': True,

From 28ef39e4c106e05596ca21001aa01261366ac9f2 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 16 Mar 2023 11:21:22 -0500
Subject: [PATCH 02/19] Add missing data file.  Disable prints

---
 tests/data/sidecar_tests/both_types_events_with_defs.json | 6 +++---
 tests/validator/test_onset_validator.py                   | 8 ++++----
 tests/validator/test_tag_validator_base.py                | 6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/data/sidecar_tests/both_types_events_with_defs.json b/tests/data/sidecar_tests/both_types_events_with_defs.json
index 29b133724..7047a1fdd 100644
--- a/tests/data/sidecar_tests/both_types_events_with_defs.json
+++ b/tests/data/sidecar_tests/both_types_events_with_defs.json
@@ -20,16 +20,16 @@
    "stim_file": {
        "LongName": "Stimulus file name",
        "Description": "Relative path of the stimulus image file",
-       "HED": "Attribute/File/#, (Definition/JsonFileDef2/#, (Item/JsonDef2/#,Item/JsonDef2)), (Definition/JsonFileDef3/#, (Item/JsonDef3/#,InvalidTag))"
+       "HED": "Age/#, (Definition/JsonFileDef2/#, (Item/JsonDef2/#,Item/JsonDef2)), (Definition/JsonFileDef3/#, (Item/JsonDef3/#))"
    },
      "takes_value_def": {
        "LongName": "Def with a takes value tag",
        "Description": "Relative path of the stimulus image file",
-       "HED": "Attribute/File/#, (Definition/TakesValueDef/#, (Age/#))"
+       "HED": "Age/#, (Definition/TakesValueDef/#, (Age/#))"
    },
   "unit_class_def": {
        "LongName": "Def with a value class",
        "Description": "Relative path of the stimulus image file",
-       "HED": "Attribute/File/#, (Definition/ValueClassDef/#, (Acceleration/#))"
+       "HED": "Age/#, (Definition/ValueClassDef/#, (Acceleration/#))"
    }
 }
\ No newline at end of file
diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py
index 1bc814f33..de46d116b 100644
--- a/tests/validator/test_onset_validator.py
+++ b/tests/validator/test_onset_validator.py
@@ -56,8 +56,8 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder
             error_handler.add_context_and_filter(onset_issues)
             test_string.shrink_defs()
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
-            print(str(onset_issues))
-            print(str(issues))
+            # print(str(onset_issues))
+            # print(str(issues))
             error_handler.pop_error_context()
             self.assertEqual(len(validator._onsets), context)
             self.assertCountEqual(onset_issues, issues)
@@ -71,8 +71,8 @@ def _test_issues_no_context(self, test_strings, test_issues):
             onset_issues = hed_validator.validate(test_string, False)
             error_handler.add_context_and_filter(onset_issues)
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
-            print(str(onset_issues))
-            print(str(issues))
+            # print(str(onset_issues))
+            # print(str(issues))
             error_handler.pop_error_context()
             self.assertCountEqual(onset_issues, issues)
 
diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py
index 75f2b10e7..37d78668c 100644
--- a/tests/validator/test_tag_validator_base.py
+++ b/tests/validator/test_tag_validator_base.py
@@ -89,9 +89,9 @@ def validator_base(self, test_strings, expected_results, expected_issues, test_f
             error_handler.add_context_and_filter(test_issues)
             test_result = not test_issues
 
-            print(test_key)
-            print(str(expected_issue))
-            print(str(test_issues))
+            # print(test_key)
+            # print(str(expected_issue))
+            # print(str(test_issues))
             error_handler.pop_error_context()
             self.assertEqual(test_result, expected_result, test_strings[test_key])
             self.assertCountEqual(test_issues, expected_issue, test_strings[test_key])

From 21590f20c51f629624de65a50ecfd1a08c24f47f Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Thu, 16 Mar 2023 16:32:12 -0500
Subject: [PATCH 03/19] Updated unit tests

---
 hed/models/df_util.py                         |  2 +-
 hed/tools/analysis/analysis_util.py           | 16 +++--
 hed/tools/analysis/hed_context_manager.py     |  2 +-
 hed/tools/analysis/hed_type_definitions.py    | 10 +--
 .../operations/convert_columns_op.py          | 70 +++++++++++++++++++
 .../remodeling/operations/valid_operations.py |  2 +
 .../test_analysis_util_assemble_hed.py        | 13 ++--
 .../analysis/test_hed_context_manager.py      | 16 +++--
 tests/tools/analysis/test_hed_tag_counts.py   |  2 +-
 .../operations/test_convert_columns_op.py     | 50 +++++++++++++
 10 files changed, 159 insertions(+), 24 deletions(-)
 create mode 100644 hed/tools/remodeling/operations/convert_columns_op.py
 create mode 100644 tests/tools/remodeling/operations/test_convert_columns_op.py

diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index b7e73a282..d877028aa 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -14,7 +14,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
             The path to the tabular file, or a TabularInput object representing it.
         sidecar: str or Sidecar
             The path to the sidecar file, or a Sidecar object representing it.
-        hed_schema: str or HedSchema
+        hed_schema: HedSchema
             If str, will attempt to load as a version if it doesn't have a valid extension.
         extra_def_dicts: list of DefinitionDict, optional
             Any extra DefinitionDict objects to use when parsing the HED tags.
diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
index c93debd0d..27f442c3d 100644
--- a/hed/tools/analysis/analysis_util.py
+++ b/hed/tools/analysis/analysis_util.py
@@ -6,13 +6,16 @@
 from hed.tools.util.data_util import separate_values
 from hed.models.hed_tag import HedTag
 from hed.models.hed_group import HedGroup
+from hed.models.df_util import get_assembled, expand_defs
 
 
-def assemble_hed(data_input, columns_included=None, expand_defs=False):
+def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
     """ Return assembled HED annotations in a dataframe.
 
     Parameters:
         data_input (TabularInput): The tabular input file whose HED annotations are to be assembled.
+        sidecar (Sidecar):  Sidecar with definitions.
+        schema (HedSchema):  Hed schema
         columns_included (list or None):  A list of additional column names to include.
             If None, only the list of assembled tags is included.
         expand_defs (bool): If True, definitions are expanded when the events are assembled.
@@ -23,14 +26,19 @@ def assemble_hed(data_input, columns_included=None, expand_defs=False):
     """
 
     eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-    hed_obj_list = get_assembled_strings(data_input, expand_defs=expand_defs)
-    hed_string_list = [str(hed) for hed in hed_obj_list]
+    hed_string_list = data_input.series_a
+    definitions = sidecar.get_def_dict(hed_schema=schema)
+    if expand_defs:
+        expand_defs(hed_string_list, schema, definitions, columns=None)
+    # hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
+    #                                    shrink_defs=False, expand_defs=True)
+    # hed_string_list = [str(hed) for hed in hed_obj_list]
     if not eligible_columns:
         df = pd.DataFrame({"HED_assembled": hed_string_list})
     else:
         df = data_input.dataframe[eligible_columns].copy(deep=True)
         df['HED_assembled'] = hed_string_list
-    definitions = data_input.get_definitions().gathered_defs
+    # definitions = data_input.get_definitions().gathered_defs
     return df, definitions
 
 
diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py
index 011330662..06a02dc82 100644
--- a/hed/tools/analysis/hed_context_manager.py
+++ b/hed/tools/analysis/hed_context_manager.py
@@ -35,7 +35,7 @@ def __init__(self, hed_strings, hed_schema):
 
         """
 
-        self.hed_strings = [HedString(str(hed), hed_schema=hed_schema) for hed in hed_strings]
+        self.hed_strings = hed_strings
         if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup):
             raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup")
         self.hed_schema = hed_schema
diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_definitions.py
index 644802627..8d49dc060 100644
--- a/hed/tools/analysis/hed_type_definitions.py
+++ b/hed/tools/analysis/hed_type_definitions.py
@@ -1,7 +1,7 @@
 """ Manages definitions associated with a type such as condition-variable. """
 
 from hed.models.hed_tag import HedTag
-from hed.models.def_mapper import DefMapper
+from hed.models.definition_dict import DefinitionDict
 
 
 class HedTypeDefinitions:
@@ -10,16 +10,18 @@ def __init__(self, definitions, hed_schema, type_tag='condition-variable'):
         """ Create a definition manager for a type of variable.
 
         Parameters:
-            definitions (dict or DefMapper): A dictionary of DefinitionEntry objects.
+            definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects.
             hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
             type_tag (str): Lower-case HED tag string representing the type managed.
 
+        # TODO: [Refactor] - should dict be allowed for definitions.
+
         """
 
         self.type_tag = type_tag.lower()
         self.hed_schema = hed_schema
-        if isinstance(definitions, DefMapper):
-            self.definitions = definitions.gathered_defs
+        if isinstance(definitions, DefinitionDict):
+            self.definitions = definitions.defs
         elif isinstance(definitions, dict):
             self.definitions = definitions
         else:
diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py
new file mode 100644
index 000000000..ae383a1e4
--- /dev/null
+++ b/hed/tools/remodeling/operations/convert_columns_op.py
@@ -0,0 +1,70 @@
+""" Convert the type of the specified columns of a tabular file. """
+
+from hed.tools.remodeling.operations.base_op import BaseOp
+
+
+class ConvertColumnsOp(BaseOp):
+    """ Convert.
+
+    Required remodeling parameters:   
+        - **column_names** (*list*):   The list of columns to convert.   
+        - **convert_to_** (*str*):  Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)   
+        - **decimal_places** (*int*):   Number decimal places to keep (for fixed only).   
+
+
+    """
+
+    PARAMS = {
+        "operation": "convert_columns",
+        "required_parameters": {
+            "column_names": list,
+            "convert_to": str
+        },
+        "optional_parameters": {
+            "decimal_places": int
+        }
+    }
+
+    def __init__(self, parameters):
+        """ Constructor for the convert columns operation.
+
+        Parameters:
+            parameters (dict): Parameter values for required and optional parameters.
+
+        Raises:  
+            KeyError    
+                - If a required parameter is missing.    
+                - If an unexpected parameter is provided.    
+
+            TypeError   
+                - If a parameter has the wrong type.   
+
+            ValueError   
+                - If convert_to is not one of the allowed values.   
+
+        """
+        super().__init__(self.PARAMS, parameters)
+        self.column_names = parameters['column_names']
+        self.convert_to = parameters['convert_to']
+        self.decimal_places = parameters.get('decimal_places', None)
+        self.allowed_types = ['str', 'int', 'float', 'fixed']
+        if self.convert_to not in self.allowed_types:
+            raise ValueError("CannotConvertToSpecifiedType",
+                             f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}")
+
+    def do_op(self, dispatcher, df, name, sidecar=None):
+        """ Convert the specified column to a specified type.
+
+        Parameters:
+            dispatcher (Dispatcher): Manages the operation I/O.
+            df (DataFrame): The DataFrame to be remodeled.
+            name (str): Unique identifier for the dataframe -- often the original file path.
+            sidecar (Sidecar or file-like):  Only needed for HED operations.
+
+        Returns:
+            DataFrame: A new DataFrame with the factor columns appended.
+
+        """
+
+        df_new = df.copy()
+        return df_new
diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py
index 36761591a..d00391270 100644
--- a/hed/tools/remodeling/operations/valid_operations.py
+++ b/hed/tools/remodeling/operations/valid_operations.py
@@ -1,5 +1,6 @@
 """ The valid operations for the remodeling tools. """
 
+# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
 from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp
 from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp
 from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp
@@ -20,6 +21,7 @@
 from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp
 
 valid_operations = {
+    # 'convert_columns': ConvertColumnsOp,
     'factor_column': FactorColumnOp,
     'factor_hed_tags': FactorHedTagsOp,
     'factor_hed_type': FactorHedTypeOp,
diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py
index 058213e3e..9c37b8620 100644
--- a/tests/tools/analysis/test_analysis_util_assemble_hed.py
+++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py
@@ -22,13 +22,14 @@ def setUpClass(cls):
 
         hed_schema = hedschema.load_schema(schema_path)
         cls.hed_schema = hed_schema
-        sidecar1 = Sidecar(json_path, name='face_sub1_json', hed_schema=hed_schema)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
         cls.sidecar_path = sidecar1
-        cls.input_data = TabularInput(events_path, hed_schema=hed_schema, sidecar=sidecar1, name="face_sub1_events")
+        cls.sidecar1 = sidecar1
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
 
     def test_assemble_hed_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data,
+        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
                                   columns_included=["onset", "duration", "event_type"], expand_defs=False)
         self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included")
         columns1 = list(df1.columns)
@@ -38,11 +39,11 @@ def test_assemble_hed_included_no_expand(self):
         self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
         self.assertEqual(first_str1.find('Def-expand'), -1,
                          "assemble_hed with no def expand does not have Def-expand tags")
-        self.assertIsInstance(dict1, dict, "hed_assemble returns a dictionary of definitions")
-        self.assertEqual(len(dict1), 17, "hed_assemble definition dictionary has the right number of elements.")
+        self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions")
+        self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
 
     def test_assemble_hed_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data,
+        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
                                   columns_included=["onset", "duration", "event_type"], expand_defs=True)
         first_str2 = df2.iloc[0]['HED_assembled']
         self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
diff --git a/tests/tools/analysis/test_hed_context_manager.py b/tests/tools/analysis/test_hed_context_manager.py
index 9ad70e958..26e0f4e87 100644
--- a/tests/tools/analysis/test_hed_context_manager.py
+++ b/tests/tools/analysis/test_hed_context_manager.py
@@ -1,13 +1,12 @@
 import os
 import unittest
 from hed.errors.exceptions import HedFileError
-from hed.models.hed_group import HedGroup
 from hed.models.hed_string import HedString
 from hed.models.sidecar import Sidecar
 from hed.models.tabular_input import TabularInput
 from hed.schema.hed_schema_io import load_schema_version
-from hed.tools.analysis.hed_context_manager import HedContextManager, OnsetGroup
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.tools.analysis.hed_context_manager import HedContextManager
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -37,7 +36,8 @@ def setUpClass(cls):
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
         sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
-        cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
+        cls.sidecar1 = sidecar1
         cls.schema = schema
 
     # def test_onset_group(self):
@@ -71,13 +71,14 @@ def test_constructor(self):
         self.assertIsInstance(context, list, "The constructor event contexts should be a list")
         self.assertIsInstance(context[1], HedString, "The constructor event contexts has a correct element")
 
-    def test_constructor(self):
+    def test_constructor1(self):
         with self.assertRaises(ValueError) as cont:
             HedContextManager(self.test_strings1, None)
         self.assertEqual(cont.exception.args[0], "ContextRequiresSchema")
 
     def test_iter(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
+        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                       join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         i = 0
         for hed, context in manager1.iter_context():
@@ -86,7 +87,8 @@ def test_iter(self):
             i = i + 1
 
     def test_constructor_from_assembled(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
+        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                       join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         self.assertEqual(len(manager1.hed_strings), 200,
                          "The constructor for assembled strings has expected # of strings")
diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py
index ece27f496..76b0a9eaf 100644
--- a/tests/tools/analysis/test_hed_tag_counts.py
+++ b/tests/tools/analysis/test_hed_tag_counts.py
@@ -24,7 +24,7 @@ def setUpClass(cls):
         schema = hedschema.load_schema(schema_path)
         cls.hed_schema = schema
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         input_df, def_dict = assemble_hed(input_data, expand_defs=False)
         cls.input_df = input_df
         cls.def_dict = def_dict
diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py
new file mode 100644
index 000000000..01a27f949
--- /dev/null
+++ b/tests/tools/remodeling/operations/test_convert_columns_op.py
@@ -0,0 +1,50 @@
+import pandas as pd
+import numpy as np
+import unittest
+from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
+from hed.tools.remodeling.dispatcher import Dispatcher
+
+
+class Test(unittest.TestCase):
+    """
+
+    TODO: Test when no factor names and values are given.
+
+    """
+    @classmethod
+    def setUpClass(cls):
+        cls.sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'],
+                           [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'],
+                           [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'],
+                           [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'],
+                           [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'],
+                           [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']]
+        cls.factored = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female', 0, 0],
+                        [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female', 0, 1],
+                        [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female', 0, 0],
+                        [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female', 1, 0],
+                        [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male', 0, 1],
+                        [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male', 0, 0]]
+        cls.sample_columns = ['onset', 'duration', 'trial_type', 'stop_signal_delay', 'response_time',
+                              'response_accuracy', 'response_hand', 'sex']
+        cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"]
+
+    def setUp(self):
+        self.base_parameters = {
+            "column_names": ["onset", "duration", "response_time"],
+            "convert_to": "int"
+        }
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_constructor_bad_convert_to(self):
+        self.base_parameters["convert_to"] = "blech"
+        with self.assertRaises(ValueError) as context:
+            ConvertColumnsOp(self.base_parameters)
+        self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType")
+
+
+if __name__ == '__main__':
+    unittest.main()

From 4c79d1b3c041cd37c9de3853a8f8e7ff4ec37a14 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 16 Mar 2023 17:01:16 -0500
Subject: [PATCH 04/19] Add some df tests.  Update hed_assemble.  Make the df
 utils also work on series.

---
 hed/models/df_util.py               |  54 ++++++++-----
 hed/tools/analysis/analysis_util.py |   7 +-
 tests/models/test_df_util.py        | 114 ++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+), 23 deletions(-)
 create mode 100644 tests/models/test_df_util.py

diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index d877028aa..66b5c75be 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -1,4 +1,5 @@
 from functools import partial
+import pandas as pd
 
 from hed.models.sidecar import Sidecar
 from hed.models.tabular_input import TabularInput
@@ -51,7 +52,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
                  for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], def_dict
 
 
-def convert_to_form(df, hed_schema, tag_form, columns):
+def convert_to_form(df, hed_schema, tag_form, columns=None):
     """ Convert all tags in underlying dataframe to the specified form.
 
         Converts in place
@@ -61,51 +62,62 @@ def convert_to_form(df, hed_schema, tag_form, columns):
         tag_form(str): HedTag property to convert tags to.
         columns (list): The columns to modify on the dataframe
     """
-    if columns is None:
-        columns = df.columns
+    if isinstance(df, pd.Series):
+        df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
+    else:
+        if columns is None:
+            columns = df.columns
 
-    for column in columns:
-        df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
+        for column in columns:
+            df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form))
 
     return df
 
 
-def shrink_defs(df, hed_schema, columns):
+def shrink_defs(df, hed_schema, columns=None):
     """ Shrinks any def-expand tags found in the dataframe.
 
         Converts in place
     Parameters:
-        df (pd.Dataframe): The dataframe to modify
+        df (pd.Dataframe or pd.Series): The dataframe or series to modify
         hed_schema (HedSchema or None): The schema to use to identify defs.
-        columns (list): The columns to modify on the dataframe
+        columns (list or None): The columns to modify on the dataframe
     """
-    if columns is None:
-        columns = df.columns
+    if isinstance(df, pd.Series):
+        mask = df.str.contains('Def-expand/', case=False)
+        df[mask] = df[mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
+    else:
+        if columns is None:
+            columns = df.columns
 
-    for column in columns:
-        mask = df[column].str.contains('Def-expand/', case=False)
-        df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
+        for column in columns:
+            mask = df[column].str.contains('Def-expand/', case=False)
+            df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema))
 
     return df
 
 
-def expand_defs(df, hed_schema, def_dict, columns):
+def expand_defs(df, hed_schema, def_dict, columns=None):
     """ Expands any def tags found in the dataframe.
 
         Converts in place
 
     Parameters:
-        df (pd.Dataframe): The dataframe to modify
+        df (pd.Dataframe or pd.Series): The dataframe or series to modify
         hed_schema (HedSchema or None): The schema to use to identify defs
         def_dict (DefinitionDict): The definitions to expand
-        columns (list): The columns to modify on the dataframe
+        columns (list or None): The columns to modify on the dataframe
     """
-    if columns is None:
-        columns = df.columns
+    if isinstance(df, pd.Series):
+        mask = df.str.contains('Def/', case=False)
+        df[mask] = df[mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
+    else:
+        if columns is None:
+            columns = df.columns
 
-    for column in columns:
-        mask = df[column].str.contains('Def/', case=False)
-        df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
+        for column in columns:
+            mask = df[column].str.contains('Def/', case=False)
+            df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
 
     return df
 
diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
index 27f442c3d..fcfd5284c 100644
--- a/hed/tools/analysis/analysis_util.py
+++ b/hed/tools/analysis/analysis_util.py
@@ -6,7 +6,7 @@
 from hed.tools.util.data_util import separate_values
 from hed.models.hed_tag import HedTag
 from hed.models.hed_group import HedGroup
-from hed.models.df_util import get_assembled, expand_defs
+from hed.models import df_util
 
 
 def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
@@ -29,7 +29,10 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
     hed_string_list = data_input.series_a
     definitions = sidecar.get_def_dict(hed_schema=schema)
     if expand_defs:
-        expand_defs(hed_string_list, schema, definitions, columns=None)
+        df_util.expand_defs(hed_string_list, schema, definitions)
+    # Keep in mind hed_string_list is now a Series.  The rest of the function should probably
+    # also be modified
+
     # hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
     #                                    shrink_defs=False, expand_defs=True)
     # hed_string_list = [str(hed) for hed in hed_obj_list]
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
new file mode 100644
index 000000000..bc9c907b7
--- /dev/null
+++ b/tests/models/test_df_util.py
@@ -0,0 +1,114 @@
+import unittest
+import pandas as pd
+
+
+from hed import load_schema_version
+from hed.models.df_util import shrink_defs, expand_defs
+from hed import DefinitionDict
+
+
+class TestShrinkDefs(unittest.TestCase):
+    def setUp(self):
+        self.schema = load_schema_version()
+
+    def test_shrink_defs_normal(self):
+        df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
+        expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
+        result = shrink_defs(df, self.schema, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_placeholder(self):
+        df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
+        expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
+        result = shrink_defs(df, self.schema, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_no_matching_tags(self):
+        df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
+        expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Age/25)"]})
+        result = shrink_defs(df, self.schema, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_multiple_columns(self):
+        df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
+                           "column2": ["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
+        expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
+                                     "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
+        result = shrink_defs(df, self.schema, ['column1', 'column2'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_multiple_defs_same_line(self):
+        df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Age/30"]})
+        expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Age/30"]})
+        result = shrink_defs(df, self.schema, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_mixed_tags(self):
+        df = pd.DataFrame({"column1": [
+            "(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem,Age/25"]})
+        expected_df = pd.DataFrame(
+            {"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Age/25"]})
+        result = shrink_defs(df, self.schema, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_shrink_defs_series_normal(self):
+        series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
+        expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
+        result = shrink_defs(series, self.schema, None)
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_shrink_defs_series_placeholder(self):
+        series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
+        expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
+        result = shrink_defs(series, self.schema, None)
+        pd.testing.assert_series_equal(result, expected_series)
+
+
+class TestExpandDefs(unittest.TestCase):
+    def setUp(self):
+        self.schema = load_schema_version()
+        self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2))",
+                                       "(Definition/TestDefPlaceholder/#,(Action/TestDef1/#,Action/TestDef2))"],
+                                       hed_schema=self.schema)
+
+    def test_expand_defs_normal(self):
+        df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]})
+        expected_df = pd.DataFrame(
+            {"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"]})
+        result = expand_defs(df, self.schema, self.def_dict, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_expand_defs_placeholder(self):
+        df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
+        expected_df = pd.DataFrame({"column1": [
+            "(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
+        result = expand_defs(df, self.schema, self.def_dict, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_expand_defs_no_matching_tags(self):
+        df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
+        expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Age/25)"]})
+        result = expand_defs(df, self.schema, self.def_dict, ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_expand_defs_multiple_columns(self):
+        df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"],
+                           "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]})
+        expected_df = pd.DataFrame(
+            {"column1": ["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"],
+             "column2": [
+                 "(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]})
+        result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_expand_defs_series_normal(self):
+        series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"])
+        expected_series = pd.Series(["(Def-expand/TestDefNormal,(Action/TestDef1/2471,Action/TestDef2)),Event/SomeEvent"])
+        result = expand_defs(series, self.schema, self.def_dict, None)
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_expand_defs_series_placeholder(self):
+        series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
+        expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
+        result = expand_defs(series, self.schema, self.def_dict, None)
+        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file

From 2698d6cc15d05d1f4b81b0054dbd3d86a978a2fd Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Fri, 17 Mar 2023 16:58:27 -0500
Subject: [PATCH 05/19] Fixed some of the refactoring errors

---
 hed/models/df_util.py                         |   3 +-
 hed/models/sidecar.py                         |   7 +-
 hed/tools/__init__.py                         |   3 +-
 hed/tools/analysis/analysis_util.py           | 103 ++++++-----
 hed/tools/analysis/event_manager.py           |  15 +-
 hed/tools/analysis/hed_context_manager.py     |  10 +-
 .../operations/factor_hed_tags_op.py          |  18 +-
 .../operations/factor_hed_type_op.py          |  14 +-
 .../operations/summarize_hed_tags_op.py       |  13 +-
 .../operations/summarize_hed_type_op.py       |  11 +-
 .../operations/summarize_hed_validation_op.py |  10 +-
 .../test_analysis_util_assemble_hed.py        |  80 +++++----
 ...est_analysis_util_get_assembled_strings.py | 167 +++++++++---------
 tests/tools/analysis/test_annotation_util.py  |   8 +-
 tests/tools/analysis/test_event_manager.py    |  17 +-
 .../analysis/test_hed_context_manager.py      |   8 +-
 tests/tools/analysis/test_hed_tag_counts.py   |   2 +-
 tests/tools/analysis/test_hed_type_counts.py  |  10 +-
 .../analysis/test_hed_type_definitions.py     |   9 +-
 tests/tools/analysis/test_hed_type_factors.py |  17 +-
 tests/tools/analysis/test_hed_type_manager.py |  60 ++++---
 tests/tools/analysis/test_hed_type_values.py  |  92 +++++-----
 .../operations/test_summarize_hed_tags_op.py  |  19 +-
 23 files changed, 355 insertions(+), 341 deletions(-)

diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index 66b5c75be..f9fa19dcc 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -26,7 +26,8 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
         expand_defs: bool
             Expand any def tags found
     Returns:
-        A list of HedStrings, or a list of lists of HedStrings
+        tuple: A list of HedStrings, or a list of lists of HedStrings, DefinitionDict
+        
     """
     if isinstance(sidecar, str):
         sidecar = Sidecar(sidecar)
diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py
index 8b808c6d1..280eba77d 100644
--- a/hed/models/sidecar.py
+++ b/hed/models/sidecar.py
@@ -156,9 +156,10 @@ def validate(self, hed_schema, extra_def_dicts=None, name=None, error_handler=No
 
         Parameters:
             hed_schema (HedSchema): Input data to be validated.
-            extra_def_dicts(list or DefinitionDict): extra def dicts in addition to sidecar
-            name(str): The name to report this sidecar as
-            error_handler (ErrorHandler): Error context to use.  Creates a new one if None
+            extra_def_dicts(list or DefinitionDict): Extra def dicts in addition to sidecar.
+            name(str): The name to report this sidecar as.
+            error_handler (ErrorHandler): Error context to use.  Creates a new one if None.
+            
         Returns:
             issues (list of dict): A list of issues associated with each level in the HED string.
         """
diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py
index 8b1f6fd90..fd1dfbbce 100644
--- a/hed/tools/__init__.py
+++ b/hed/tools/__init__.py
@@ -47,7 +47,8 @@
 from .analysis.annotation_util import \
     check_df_columns, extract_tags, generate_sidecar_entry, hed_to_df, df_to_hed, merge_hed_dict
 from .analysis import analysis_util
-from .analysis.analysis_util import assemble_hed, search_tabular, get_assembled_strings
+from .analysis.analysis_util import assemble_hed
+# from .analysis.analysis_util import search_tabular, get_assembled_strings
 
 from .remodeling.cli import run_remodel
 from .remodeling.cli import run_remodel_backup
diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
index fcfd5284c..a4c57c9f6 100644
--- a/hed/tools/analysis/analysis_util.py
+++ b/hed/tools/analysis/analysis_util.py
@@ -2,7 +2,6 @@
 
 import pandas as pd
 from hed.models.tabular_input import TabularInput
-from hed.models.expression_parser import QueryParser
 from hed.tools.util.data_util import separate_values
 from hed.models.hed_tag import HedTag
 from hed.models.hed_group import HedGroup
@@ -45,57 +44,57 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
     return df, definitions
 
 
-def get_assembled_strings(table, hed_schema=None, expand_defs=False):
-    """ Return HED string objects for a tabular file.
-
-    Parameters:
-        table (TabularInput): The input file to be searched.
-        hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form.
-        expand_defs (bool): If True, definitions are expanded when the events are assembled.
-
-    Returns:
-        list: A list of HedString or HedStringGroup objects.
-
-    """
-    hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
-                                         expand_defs=expand_defs, remove_definitions=True))
-    return hed_list
-
-
-def search_tabular(data_input, hed_schema, query, columns_included=None):
-    """ Return a dataframe with results of query.
-
-    Parameters:
-        data_input (TabularInput): The tabular input file (e.g., events) to be searched.
-        hed_schema (HedSchema or HedSchemaGroup):  The schema(s) under which to make the query.
-        query (str or list):     The str query or list of string queries to make.
-        columns_included (list or None):  List of names of columns to include
-
-    Returns:
-        DataFrame or None: A DataFrame with the results of the query or None if no events satisfied the query.
-
-    """
-
-    eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-    hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
-    expression = QueryParser(query)
-    hed_tags = []
-    row_numbers = []
-    for index, next_item in enumerate(hed_list):
-        match = expression.search(next_item)
-        if not match:
-            continue
-        hed_tags.append(next_item)
-        row_numbers.append(index)
-
-    if not row_numbers:
-        df = None
-    elif not eligible_columns:
-        df = pd.DataFrame({'row_number': row_numbers, 'HED_assembled': hed_tags})
-    else:
-        df = data_input.dataframe.iloc[row_numbers][eligible_columns].reset_index()
-        df.rename(columns={'index': 'row_number'})
-    return df
+# def get_assembled_strings(table, hed_schema=None, expand_defs=False):
+#     """ Return HED string objects for a tabular file.
+# 
+#     Parameters:
+#         table (TabularInput): The input file to be searched.
+#         hed_schema (HedSchema or HedschemaGroup): If provided the HedStrings are converted to canonical form.
+#         expand_defs (bool): If True, definitions are expanded when the events are assembled.
+# 
+#     Returns:
+#         list: A list of HedString or HedStringGroup objects.
+# 
+#     """
+#     hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True,
+#                                          expand_defs=expand_defs, remove_definitions=True))
+#     return hed_list
+# 
+
+# def search_tabular(data_input, hed_schema, query, columns_included=None):
+#     """ Return a dataframe with results of query.
+# 
+#     Parameters:
+#         data_input (TabularInput): The tabular input file (e.g., events) to be searched.
+#         hed_schema (HedSchema or HedSchemaGroup):  The schema(s) under which to make the query.
+#         query (str or list):     The str query or list of string queries to make.
+#         columns_included (list or None):  List of names of columns to include
+# 
+#     Returns:
+#         DataFrame or None: A DataFrame with the results of the query or None if no events satisfied the query.
+# 
+#     """
+# 
+#     eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
+#     hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
+#     expression = QueryParser(query)
+#     hed_tags = []
+#     row_numbers = []
+#     for index, next_item in enumerate(hed_list):
+#         match = expression.search(next_item)
+#         if not match:
+#             continue
+#         hed_tags.append(next_item)
+#         row_numbers.append(index)
+# 
+#     if not row_numbers:
+#         df = None
+#     elif not eligible_columns:
+#         df = pd.DataFrame({'row_number': row_numbers, 'HED_assembled': hed_tags})
+#     else:
+#         df = data_input.dataframe.iloc[row_numbers][eligible_columns].reset_index()
+#         df.rename(columns={'index': 'row_number'})
+#     return df
 
 
 # def remove_defs(hed_strings):
diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py
index 2d6da7adc..f8bf5e5f5 100644
--- a/hed/tools/analysis/event_manager.py
+++ b/hed/tools/analysis/event_manager.py
@@ -3,25 +3,26 @@
 from hed.schema import HedSchema, HedSchemaGroup
 from hed.tools.analysis.temporal_event import TemporalEvent
 from hed.models.model_constants import DefTagNames
+from hed.models.df_util import get_assembled
 
 
 class EventManager:
 
-    def __init__(self, data, hed_schema):
+    def __init__(self, data, schema):
         """ Create an event manager for an events file.
 
         Parameters:
             data (TabularInput): A tabular input file.
-            hed_schema (HedSchema): A HED schema
+            schema (HedSchema): A HED schema
 
         Raises:
             HedFileError: if there are any unmatched offsets.
 
         """
 
-        if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup):
+        if not isinstance(schema, HedSchema) and not isinstance(schema, HedSchemaGroup):
             raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup")
-        self.hed_schema = hed_schema
+        self.schema = schema
         self.data = data
         self.event_list = [[] for _ in range(len(self.data.dataframe))]
         self.hed_strings = [None for _ in range(len(self.data.dataframe))]
@@ -56,10 +57,10 @@ def _create_event_list(self):
 
         onset_dict = {}
         event_index = 0
-        for hed in self.data.iter_dataframe(hed_ops=[self.hed_schema], return_string_only=True,
-                                            expand_defs=False, remove_definitions=True):
+        self.hed_strings, definitions = get_assembled(self.data, self.data._sidecar, self.schema, extra_def_dicts=None,
+                                                      join_columns=True, shrink_defs=True, expand_defs=False)
+        for hed in self.hed_strings:
             # to_remove = []  # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1)
-            self.hed_strings[event_index] = hed
             group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY},
                                                    include_groups=2)
             for tup in group_tuples:
diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py
index 06a02dc82..5c565a9a4 100644
--- a/hed/tools/analysis/hed_context_manager.py
+++ b/hed/tools/analysis/hed_context_manager.py
@@ -5,6 +5,7 @@
 from hed.schema import HedSchema, HedSchemaGroup
 from hed.tools.analysis.analysis_util import hed_to_str
 
+#TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager.
 
 class OnsetGroup:
     def __init__(self, name, contents, start_index, end_index=None):
@@ -23,7 +24,8 @@ def __init__(self, hed_strings, hed_schema):
         """ Create an context manager for an events file.
 
         Parameters:
-            hed_strings (list): A list of hed_strings to be managed.
+            hed_strings (list): A list of HedString objects to be managed.
+            hed_schema (HedSchema):  A HedSchema
 
         Raises:
             HedFileError: if there are any unmatched offsets.
@@ -46,6 +48,12 @@ def __init__(self, hed_strings, hed_schema):
         self._create_onset_list()
         self._set_event_contexts()
 
+    # def _extract_hed_objs(self, assembled):
+    #     hed_objs = [None for _ in range(len(assembled))]
+    #     for index, value in assembled["HED_assembled"].items():
+    #         hed_objs[index] = HedString(value, hed_schema=self.hed_schema)
+    #     return hed_objs
+    
     def iter_context(self):
         """ Iterate rows of context.
 
diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py
index 41d3f805a..aa02224b9 100644
--- a/hed/tools/remodeling/operations/factor_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py
@@ -7,7 +7,7 @@
 from hed.models.tabular_input import TabularInput
 from hed.models.sidecar import Sidecar
 from hed.models.expression_parser import QueryParser
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 
 
 class FactorHedTagsOp(BaseOp):
@@ -101,16 +101,16 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         """
 
         if sidecar and not isinstance(sidecar, Sidecar):
-            sidecar = Sidecar(sidecar, hed_schema=dispatcher.hed_schema)
-        input_data = TabularInput(df, hed_schema=dispatcher.hed_schema, sidecar=sidecar)
+            sidecar = Sidecar(sidecar)
+        input_data = TabularInput(df.copy(), sidecar=sidecar, name=name)
         column_names = list(df.columns)
-        for name in self.query_names:
-            if name in column_names:
+        for query_name in self.query_names:
+            if query_name in column_names:
                 raise ValueError("QueryNameAlreadyColumn",
-                                 f"Query [{name}]: is already a column name of the data frame")
-        df = input_data.dataframe.copy()
-        df_list = [df]
-        hed_strings = get_assembled_strings(input_data, hed_schema=dispatcher.hed_schema, expand_defs=True)
+                                 f"Query [{query_name}]: is already a column name of the data frame")
+        df_list = [input_data.dataframe]
+        hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, 
+                                                 join_columns=True, shrink_defs=False, expand_defs=True)
         df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=self.query_names)
         for parse_ind, parser in enumerate(self.expression_parsers):
             for index, next_item in enumerate(hed_strings):
diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py
index e4a43c181..668886c88 100644
--- a/hed/tools/remodeling/operations/factor_hed_type_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_type_op.py
@@ -5,7 +5,7 @@
 from hed.tools.remodeling.operations.base_op import BaseOp
 from hed.models.tabular_input import TabularInput
 from hed.models.sidecar import Sidecar
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 from hed.tools.analysis.hed_type_manager import HedTypeManager
 
 # TODO: restricted factor values are not implemented yet.
@@ -69,13 +69,13 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         """
 
         if sidecar and not isinstance(sidecar, Sidecar):
-            sidecar = Sidecar(sidecar, hed_schema=dispatcher.hed_schema)
-        input_data = TabularInput(df, hed_schema=dispatcher.hed_schema, sidecar=sidecar)
-        df = input_data.dataframe.copy()
-        df_list = [df]
-        hed_strings = get_assembled_strings(input_data, hed_schema=dispatcher.hed_schema, expand_defs=False)
+            sidecar = Sidecar(sidecar)
+        input_data = TabularInput(df, sidecar=sidecar, name=name)
+        df_list = [input_data.dataframe.copy()]
+        hed_strings, definitions = get_assembled(input_data, sidecar, dispatcher.hed_schema, 
+                                                 extra_def_dicts=None, join_columns=True,
+                                                 shrink_defs=False, expand_defs=True)
 
-        definitions = input_data.get_definitions()
         var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions)
         var_manager.add_type_variable(self.type_tag.lower())
 
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index 09f7e3a48..a8d220df8 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -5,6 +5,7 @@
 from hed.tools.analysis.hed_tag_counts import HedTagCounts
 from hed.tools.remodeling.operations.base_op import BaseOp
 from hed.tools.remodeling.operations.base_context import BaseContext
+from hed.models.df_util import get_assembled
 
 
 class SummarizeHedTagsOp(BaseOp):
@@ -97,12 +98,14 @@ def update_context(self, new_context):
         counts = HedTagCounts(new_context['name'], total_events=len(new_context['df']))
         sidecar = new_context['sidecar']
         if sidecar and not isinstance(sidecar, Sidecar):
-            sidecar = Sidecar(sidecar, hed_schema=new_context['schema'])
-        input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'], sidecar=sidecar)
+            sidecar = Sidecar(sidecar)
+        input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name'])
+        hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'], 
+                                                 extra_def_dicts=None, join_columns=True,
+                                                 shrink_defs=False, expand_defs=True)
         # definitions = input_data.get_definitions().gathered_defs
-        for objs in input_data.iter_dataframe(hed_ops=[new_context['schema']], return_string_only=False,
-                                              expand_defs=True, remove_definitions=True):
-            counts.update_event_counts(objs['HED'], new_context['name'])
+        for hed in hed_strings:
+            counts.update_event_counts(hed, new_context['name'])
         self.summary_dict[new_context["name"]] = counts
 
     def _get_summary_details(self, merge_counts):
diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py
index 2c7ab7c64..0e2664698 100644
--- a/hed/tools/remodeling/operations/summarize_hed_type_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py
@@ -2,7 +2,7 @@
 
 from hed.models.tabular_input import TabularInput
 from hed.models.sidecar import Sidecar
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 from hed.tools.analysis.hed_type_values import HedTypeValues
 from hed.tools.analysis.hed_type_counts import HedTypeCounts
 from hed.tools.analysis.hed_context_manager import HedContextManager
@@ -90,10 +90,11 @@ def __init__(self, sum_op):
     def update_context(self, new_context):
         sidecar = new_context['sidecar']
         if sidecar and not isinstance(sidecar, Sidecar):
-            sidecar = Sidecar(sidecar, hed_schema=new_context['schema'])
-        input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'], sidecar=sidecar)
-        hed_strings = get_assembled_strings(input_data, hed_schema=new_context['schema'], expand_defs=False)
-        definitions = input_data.get_definitions().gathered_defs
+            sidecar = Sidecar(sidecar)
+        input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name'])
+        hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'], 
+                                                 extra_def_dicts=None, join_columns=True,
+                                                 shrink_defs=False, expand_defs=True)
         context_manager = HedContextManager(hed_strings, new_context['schema'])
         type_values = HedTypeValues(context_manager, definitions, new_context['name'], type_tag=self.type_tag)
 
diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
index 771b49e5c..d1bd8f53e 100644
--- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
@@ -102,7 +102,6 @@ def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
         return "\n".join(sum_list)
 
     def update_context(self, new_context):
-        validator = HedValidator(hed_schema=new_context['schema'])
         results = self.get_empty_results()
         results["total_event_files"] = 1
         results["event_issues"][new_context["name"]] = []
@@ -111,10 +110,9 @@ def update_context(self, new_context):
         filtered_issues = []
         if sidecar:
             if not isinstance(sidecar, Sidecar):
-                sidecar = Sidecar(files=new_context['sidecar'], name=os.path.basename(sidecar),
-                                  hed_schema=new_context['schema'])
+                sidecar = Sidecar(files=new_context['sidecar'], name=os.path.basename(sidecar))
             results["sidecar_issues"][sidecar.name] = []
-            sidecar_issues = sidecar.validate_entries(validator, check_for_warnings=self.check_for_warnings)
+            sidecar_issues = sidecar.validate(new_context['schema'])
             filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR)
             if not self.check_for_warnings:
                 sidecar_issues = filtered_issues
@@ -123,8 +121,8 @@ def update_context(self, new_context):
             results['total_sidecar_files'] = 1
         if not filtered_issues:
             results['validation_completed'] = True
-            input_data = TabularInput(new_context['df'], hed_schema=new_context['schema'],  sidecar=sidecar)
-            issues = input_data.validate_file(validator, check_for_warnings=self.check_for_warnings)
+            input_data = TabularInput(new_context['df'], sidecar=sidecar)
+            issues = input_data.validate(new_context['schema'])
             if not self.check_for_warnings:
                 issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
             results['event_issues'][new_context["name"]] = issues
diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py
index 9c37b8620..318c3aa54 100644
--- a/tests/tools/analysis/test_analysis_util_assemble_hed.py
+++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py
@@ -2,8 +2,10 @@
 import unittest
 from pandas import DataFrame
 from hed import schema as hedschema
-from hed.models import Sidecar, TabularInput
-from hed.tools import assemble_hed, search_tabular
+from hed.models import Sidecar, TabularInput, DefinitionDict
+from hed.tools.analysis.analysis_util import assemble_hed
+
+
 
 
 # noinspection PyBroadException
@@ -20,8 +22,8 @@ def setUpClass(cls):
         events_path = os.path.realpath(os.path.join(bids_root_path,
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
 
-        hed_schema = hedschema.load_schema(schema_path)
-        cls.hed_schema = hed_schema
+        schema = hedschema.load_schema(schema_path)
+        cls.schema = schema
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
         cls.sidecar_path = sidecar1
         cls.sidecar1 = sidecar1
@@ -29,8 +31,8 @@ def setUpClass(cls):
         cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
 
     def test_assemble_hed_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
-                                  columns_included=["onset", "duration", "event_type"], expand_defs=False)
+        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=False,
+                                  columns_included=["onset", "duration", "event_type"])
         self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included")
         columns1 = list(df1.columns)
         self.assertEqual(len(columns1), 4,
@@ -43,28 +45,29 @@ def test_assemble_hed_included_no_expand(self):
         self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
 
     def test_assemble_hed_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
-                                  columns_included=["onset", "duration", "event_type"], expand_defs=True)
+        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
+                                  columns_included=["onset", "duration", "event_type"])
         first_str2 = df2.iloc[0]['HED_assembled']
         self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
         self.assertNotEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
 
     def test_assemble_hed_included_no_expand_bad_column(self):
-        df3, dict3 = assemble_hed(self.input_data,
-                                  columns_included=["onset", "baloney", "duration", "event_type"], expand_defs=False)
+        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
+                                  columns_included=["onset", "baloney", "duration", "event_type"])
         columns3 = list(df3.columns)
         self.assertEqual(len(columns3), 4,
                          "assemble_hed should return the correct number of columns when bad columns are included ")
 
     def test_assemble_hed_included_expand_bad_column(self):
-        df3, dict3 = assemble_hed(self.input_data,
-                                  columns_included=["onset", "baloney", "duration", "event_type"], expand_defs=True)
+        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema, expand_defs=True,
+                                  columns_included=["onset", "baloney", "duration", "event_type"])
         columns3 = list(df3.columns)
         self.assertEqual(len(columns3), 4,
                          "assemble_hed should return the correct number of columns when bad columns are included ")
 
     def test_assemble_hed_no_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data, columns_included=None, expand_defs=False)
+        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.schema,
+                                  columns_included=None, expand_defs=False)
         self.assertIsInstance(df1, DataFrame, "hed_assemble returns a dataframe when no columns are included")
         columns1 = list(df1.columns)
         self.assertEqual(len(columns1), 1,
@@ -73,17 +76,18 @@ def test_assemble_hed_no_included_no_expand(self):
         self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
         self.assertEqual(first_str1.find('Def-expand'), -1,
                          "assemble_hed with no def expand does not have Def-expand tags")
-        self.assertIsInstance(dict1, dict, "hed_assemble returns a dictionary of definitions")
-        self.assertEqual(len(dict1), 17, "hed_assemble definition dictionary has the right number of elements.")
+        self.assertIsInstance(dict1, DefinitionDict, "hed_assemble returns a dictionary of definitions")
+        self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
 
     def test_assemble_hed_no_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data, columns_included=None, expand_defs=True)
+        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.schema,
+                                  columns_included=None, expand_defs=True)
         first_str2 = df2.iloc[0]['HED_assembled']
         self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
         self.assertNotEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
 
     def test_assemble_hed_bad_column_no_expand(self):
-        df3, dict3 = assemble_hed(self.input_data,
+        df3, dict3 = assemble_hed(self.input_data, self.sidecar1, self.schema,
                                   columns_included=["onset", "baloney", "duration", "event_type"], expand_defs=False)
         columns3 = list(df3.columns)
         self.assertEqual(len(columns3), 4,
@@ -92,27 +96,27 @@ def test_assemble_hed_bad_column_no_expand(self):
         self.assertNotEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
         self.assertEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
 
-    def test_search_tabular(self):
-        query1 = "sensory-event"
-        df1 = search_tabular(self.input_data, self.hed_schema, query1, columns_included=None)
-        self.assertIsInstance(df1, DataFrame, "search_tabular returns a dataframe when the query is satisfied.")
-        self.assertEqual(len(df1.columns), 2, "search_tabular has the right number of columns when query okay")
-        self.assertEqual(len(df1.index), 155, "search_tabular has right number of rows when query okay")
-        query2 = 'data-feature'
-        df2 = search_tabular(self.input_data, self.hed_schema, query2, columns_included=None)
-        self.assertFalse(df2, "search_tabular returns None when query is not satisfied.")
-
-        query3 = "sensory-event"
-        df3 = search_tabular(self.input_data, self.hed_schema, query3, columns_included=['event_type', 'rep_status'])
-        self.assertIsInstance(df3, DataFrame, "search_tabular returns a DataFrame when extra columns")
-        self.assertEqual(len(df3.columns), 3, "search_tabular returns right number of columns when extra columns")
-        self.assertEqual(len(df3.index), 155, "search_tabular has right number of rows when query okay")
-
-        df4 = search_tabular(self.input_data, self.hed_schema, query3,
-                             columns_included=['onset', 'event_type', 'rep_status'])
-        self.assertIsInstance(df4, DataFrame, "search_tabular returns a DataFrame when extra columns")
-        self.assertEqual(len(df4.columns), 4, "search_tabular returns right number of columns when extra columns")
-        self.assertEqual(len(df4.index), 155, "search_tabular has right number of rows when query okay")
+    # def test_search_tabular(self):
+    #     query1 = "sensory-event"
+    #     df1 = search_tabular(self.input_data, self.schema, query1, columns_included=None)
+    #     self.assertIsInstance(df1, DataFrame, "search_tabular returns a dataframe when the query is satisfied.")
+    #     self.assertEqual(len(df1.columns), 2, "search_tabular has the right number of columns when query okay")
+    #     self.assertEqual(len(df1.index), 155, "search_tabular has right number of rows when query okay")
+    #     query2 = 'data-feature'
+    #     df2 = search_tabular(self.input_data, self.hed_schema, query2, columns_included=None)
+    #     self.assertFalse(df2, "search_tabular returns None when query is not satisfied.")
+    # 
+    #     query3 = "sensory-event"
+    #     df3 = search_tabular(self.input_data, self.hed_schema, query3, columns_included=['event_type', 'rep_status'])
+    #     self.assertIsInstance(df3, DataFrame, "search_tabular returns a DataFrame when extra columns")
+    #     self.assertEqual(len(df3.columns), 3, "search_tabular returns right number of columns when extra columns")
+    #     self.assertEqual(len(df3.index), 155, "search_tabular has right number of rows when query okay")
+    # 
+    #     df4 = search_tabular(self.input_data, self.hed_schema, query3,
+    #                          columns_included=['onset', 'event_type', 'rep_status'])
+    #     self.assertIsInstance(df4, DataFrame, "search_tabular returns a DataFrame when extra columns")
+    #     self.assertEqual(len(df4.columns), 4, "search_tabular returns right number of columns when extra columns")
+    #     self.assertEqual(len(df4.index), 155, "search_tabular has right number of rows when query okay")
 
 
 if __name__ == '__main__':
diff --git a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py
index 143db3305..036b4c938 100644
--- a/tests/tools/analysis/test_analysis_util_get_assembled_strings.py
+++ b/tests/tools/analysis/test_analysis_util_get_assembled_strings.py
@@ -3,7 +3,7 @@
 from hed import schema as hedschema
 from hed.models.hed_string import HedString
 from hed.models.tabular_input import TabularInput
-from hed.tools.analysis.analysis_util import get_assembled_strings
+# from hed.tools.analysis.analysis_util import get_assembled_strings
 
 
 # noinspection PyBroadException
@@ -26,90 +26,89 @@ def setUpClass(cls):
         # cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
 
     def setUp(self):
-        self.input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, 
-                                       sidecar=self.json_path, name="face_sub1_events")
+        self.input_data = TabularInput(self.events_path, sidecar=self.json_path, name="face_sub1_events")
 
-    def test_get_assembled_strings_no_schema_no_def_expand(self):
-        hed_list1 = get_assembled_strings(self.input_data, expand_defs=False)
-        self.assertIsInstance(hed_list1, list, "get_assembled_groups should return a list when expand defs is False")
-        self.assertIsInstance(hed_list1[0], HedString)
-        hed_strings1 = [str(hed) for hed in hed_list1]
-        self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.")
-        self.assertIsInstance(hed_strings1, list)
-        hed_strings_joined1 = ",".join(hed_strings1)
-        self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1,
-                         "get_assembled_strings should not have Def-expand when expand_defs is False")
-        self.assertNotEqual(hed_strings_joined1.find("Def/"), -1,
-                            "get_assembled_strings should have Def/ when expand_defs is False")
-
-    def test_get_assembled_strings_no_schema_def_expand(self):
-        hed_list2 = get_assembled_strings(self.input_data, expand_defs=True)
-        self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list")
-        self.assertIsInstance(hed_list2[0], HedString)
-        hed_strings2 = [str(hed) for hed in hed_list2]
-        self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.")
-        self.assertIsInstance(hed_strings2, list, "get_assembled")
-        hed_strings_joined2 = ",".join(hed_strings2)
-        self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1,
-                            "get_assembled_strings should have Def-expand when expand_defs is True")
-        self.assertEqual(hed_strings_joined2.find("Def/"), -1,
-                         "get_assembled_strings should not have Def/ when expand_defs is True")
-
-    def test_get_assembled_strings_with_schema_no_def_expand(self):
-        hed_list1 = get_assembled_strings(self. input_data, hed_schema=self.hed_schema, expand_defs=False)
-        self.assertIsInstance(hed_list1, list, "get_assembled_strings returns a list when expand defs is False")
-        self.assertIsInstance(hed_list1[0], HedString)
-        hed_strings1 = [str(hed) for hed in hed_list1]
-        self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.")
-        self.assertIsInstance(hed_strings1, list)
-        hed_strings_joined1 = ",".join(hed_strings1)
-        self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1,
-                         "get_assembled_strings does not have Def-expand when expand_defs is False")
-        self.assertNotEqual(hed_strings_joined1.find("Def/"), -1,
-                            "get_assembled_strings should have Def/ when expand_defs is False")
-
-    def test_get_assembled_strings_with_schema_def_expand(self):
-        hed_list2 = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=True)
-        self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list")
-        self.assertIsInstance(hed_list2[0], HedString)
-        hed_strings2 = [str(hed) for hed in hed_list2]
-        self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.")
-        self.assertIsInstance(hed_strings2, list, "get_assembled")
-        hed_strings_joined2 = ",".join(hed_strings2)
-        self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1,
-                            "get_assembled_strings should have Def-expand when expand_defs is True")
-        self.assertEqual(hed_strings_joined2.find("Def/"), -1,
-                         "get_assembled_strings should not have Def/ when expand_defs is True")
-
-    def test_get_assembled_strings_no_sidecar_no_schema(self):
-        input_data = TabularInput(self.events_path, name="face_sub1_events")
-        hed_list1 = get_assembled_strings(input_data, expand_defs=False)
-        self.assertEqual(len(hed_list1), 200,
-                         "get_assembled_strings should have right number of entries when no sidecar")
-        self.assertIsInstance(hed_list1[0], HedString,
-                              "get_assembled_string should return an HedString when no sidecar")
-        self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar")
-        hed_list2 = get_assembled_strings(input_data, expand_defs=True)
-        self.assertEqual(len(hed_list2), 200,
-                         "get_assembled_strings should have right number of entries when no sidecar")
-        self.assertIsInstance(hed_list2[0], HedString,
-                              "get_assembled_string should return an HedString when no sidecar")
-        self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar")
-
-    def test_get_assembled_strings_no_sidecar_schema(self):
-        input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, name="face_sub1_events")
-        hed_list1 = get_assembled_strings(input_data, expand_defs=False)
-        self.assertEqual(len(hed_list1), 200,
-                         "get_assembled_strings should have right number of entries when no sidecar")
-        self.assertIsInstance(hed_list1[0], HedString,
-                              "get_assembled_string should return an HedString when no sidecar")
-        self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar")
-        hed_list2 = get_assembled_strings(input_data, expand_defs=True)
-        self.assertEqual(len(hed_list2), 200,
-                         "get_assembled_strings should have right number of entries when no sidecar")
-        self.assertIsInstance(hed_list2[0], HedString,
-                              "get_assembled_string should return an HedString when no sidecar")
-        self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar")
+    # def test_get_assembled_strings_no_schema_no_def_expand(self):
+    #     hed_list1 = get_assembled_strings(self.input_data, expand_defs=False)
+    #     self.assertIsInstance(hed_list1, list, "get_assembled_groups should return a list when expand defs is False")
+    #     self.assertIsInstance(hed_list1[0], HedString)
+    #     hed_strings1 = [str(hed) for hed in hed_list1]
+    #     self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.")
+    #     self.assertIsInstance(hed_strings1, list)
+    #     hed_strings_joined1 = ",".join(hed_strings1)
+    #     self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1,
+    #                      "get_assembled_strings should not have Def-expand when expand_defs is False")
+    #     self.assertNotEqual(hed_strings_joined1.find("Def/"), -1,
+    #                         "get_assembled_strings should have Def/ when expand_defs is False")
+    # 
+    # def test_get_assembled_strings_no_schema_def_expand(self):
+    #     hed_list2 = get_assembled_strings(self.input_data, self.hed_schema, expand_defs=True)
+    #     self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list")
+    #     self.assertIsInstance(hed_list2[0], HedString)
+    #     hed_strings2 = [str(hed) for hed in hed_list2]
+    #     self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.")
+    #     self.assertIsInstance(hed_strings2, list, "get_assembled")
+    #     hed_strings_joined2 = ",".join(hed_strings2)
+    #     self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1,
+    #                         "get_assembled_strings should have Def-expand when expand_defs is True")
+    #     self.assertEqual(hed_strings_joined2.find("Def/"), -1,
+    #                      "get_assembled_strings should not have Def/ when expand_defs is True")
+    # 
+    # def test_get_assembled_strings_with_schema_no_def_expand(self):
+    #     hed_list1 = get_assembled_strings(self. input_data, hed_schema=self.hed_schema, expand_defs=False)
+    #     self.assertIsInstance(hed_list1, list, "get_assembled_strings returns a list when expand defs is False")
+    #     self.assertIsInstance(hed_list1[0], HedString)
+    #     hed_strings1 = [str(hed) for hed in hed_list1]
+    #     self.assertIsInstance(hed_strings1[0], str, "get_assembled_strings can be converted.")
+    #     self.assertIsInstance(hed_strings1, list)
+    #     hed_strings_joined1 = ",".join(hed_strings1)
+    #     self.assertEqual(hed_strings_joined1.find("Def-expand/"), -1,
+    #                      "get_assembled_strings does not have Def-expand when expand_defs is False")
+    #     self.assertNotEqual(hed_strings_joined1.find("Def/"), -1,
+    #                         "get_assembled_strings should have Def/ when expand_defs is False")
+    # 
+    # def test_get_assembled_strings_with_schema_def_expand(self):
+    #     hed_list2 = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=True)
+    #     self.assertIsInstance(hed_list2, list, "get_assembled_groups should return a list")
+    #     self.assertIsInstance(hed_list2[0], HedString)
+    #     hed_strings2 = [str(hed) for hed in hed_list2]
+    #     self.assertIsInstance(hed_strings2[0], str, "get_assembled_strings can be converted.")
+    #     self.assertIsInstance(hed_strings2, list, "get_assembled")
+    #     hed_strings_joined2 = ",".join(hed_strings2)
+    #     self.assertNotEqual(hed_strings_joined2.find("Def-expand/"), -1,
+    #                         "get_assembled_strings should have Def-expand when expand_defs is True")
+    #     self.assertEqual(hed_strings_joined2.find("Def/"), -1,
+    #                      "get_assembled_strings should not have Def/ when expand_defs is True")
+    # 
+    # def test_get_assembled_strings_no_sidecar_no_schema(self):
+    #     input_data = TabularInput(self.events_path, name="face_sub1_events")
+    #     hed_list1 = get_assembled_strings(input_data, expand_defs=False)
+    #     self.assertEqual(len(hed_list1), 200,
+    #                      "get_assembled_strings should have right number of entries when no sidecar")
+    #     self.assertIsInstance(hed_list1[0], HedString,
+    #                           "get_assembled_string should return an HedString when no sidecar")
+    #     self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar")
+    #     hed_list2 = get_assembled_strings(input_data, expand_defs=True)
+    #     self.assertEqual(len(hed_list2), 200,
+    #                      "get_assembled_strings should have right number of entries when no sidecar")
+    #     self.assertIsInstance(hed_list2[0], HedString,
+    #                           "get_assembled_string should return an HedString when no sidecar")
+    #     self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar")
+    # 
+    # def test_get_assembled_strings_no_sidecar_schema(self):
+    #     input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, name="face_sub1_events")
+    #     hed_list1 = get_assembled_strings(input_data, expand_defs=False)
+    #     self.assertEqual(len(hed_list1), 200,
+    #                      "get_assembled_strings should have right number of entries when no sidecar")
+    #     self.assertIsInstance(hed_list1[0], HedString,
+    #                           "get_assembled_string should return an HedString when no sidecar")
+    #     self.assertFalse(hed_list1[0].children, "get_assembled_string returned HedString is empty when no sidecar")
+    #     hed_list2 = get_assembled_strings(input_data, expand_defs=True)
+    #     self.assertEqual(len(hed_list2), 200,
+    #                      "get_assembled_strings should have right number of entries when no sidecar")
+    #     self.assertIsInstance(hed_list2[0], HedString,
+    #                           "get_assembled_string should return an HedString when no sidecar")
+    #     self.assertFalse(hed_list2[0].children, "get_assembled_string returned HedString is empty when no sidecar")
 
 
 if __name__ == '__main__':
diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py
index fcf2ce03a..f54dd1dc8 100644
--- a/tests/tools/analysis/test_annotation_util.py
+++ b/tests/tools/analysis/test_annotation_util.py
@@ -232,19 +232,19 @@ def test_hed_to_df_with_definitions(self):
                          "hed_to_df should have right description when in parentheses")
 
     def test_hed_to_df_to_hed(self):
-        validator = HedValidator(self.hed_schema)
+        # validator = HedValidator(self.hed_schema)
         side1 = Sidecar(files=self.json_path, name="sidecar_face.json")
-        issues1 = side1.validate_entries(validator, check_for_warnings=True)
+        issues1 = side1.validate(self.hed_schema)
         self.assertFalse(issues1, "hed_to_df_to_hed is starting with a valid JSON sidecar")
         df1 = hed_to_df(self.sidecar_face)
         self.assertIsInstance(df1, DataFrame, "hed_to_df_to_hed starting sidecar can be converted to df")
         hed2 = df_to_hed(df1, description_tag=True)
         side2 = Sidecar(files=io.StringIO(json.dumps(hed2)), name='JSON_Sidecar2')
-        issues2 = side2.validate_entries(validator, check_for_warnings=True)
+        issues2 = side2.validate(self.hed_schema)
         self.assertFalse(issues2, "hed_to_df_to_hed is valid after conversion back and forth with description True")
         hed3 = df_to_hed(df1, description_tag=False)
         side3 = Sidecar(files=io.StringIO(json.dumps(hed3)), name='JSON_Sidecar2')
-        issues3 = side3.validate_entries(validator, check_for_warnings=True)
+        issues3 = side3.validate(self.hed_schema)
         self.assertFalse(issues3, "hed_to_df_to_hed is valid after conversion back and forth with description False")
 
     def test_merge_hed_dict_cat_col(self):
diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py
index dd920256a..09eb17a50 100644
--- a/tests/tools/analysis/test_event_manager.py
+++ b/tests/tools/analysis/test_event_manager.py
@@ -1,13 +1,9 @@
 import os
 import unittest
-from hed.errors.exceptions import HedFileError
-from hed.models.hed_group import HedGroup
-from hed.models.hed_string import HedString
+
 from hed.models.sidecar import Sidecar
 from hed.models.tabular_input import TabularInput
 from hed.schema.hed_schema_io import load_schema_version
-from hed.tools.analysis.hed_context_manager import HedContextManager, OnsetGroup
-from hed.tools.analysis.analysis_util import get_assembled_strings
 from hed.tools.analysis.event_manager import EventManager
 from hed.tools.analysis.temporal_event import TemporalEvent
 
@@ -23,7 +19,7 @@ def setUpClass(cls):
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
         sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
-        cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.schema = schema
 
     def test_constructor(self):
@@ -33,14 +29,13 @@ def test_constructor(self):
         for index, item in enumerate(manager1.event_list):
             for event in item:
                 event_count = event_count + 1
-                self.assertFalse(event.duration)  
+                self.assertFalse(event.duration)
                 self.assertTrue(event.end_index)
                 self.assertEqual(event.start_index, index)
                 self.assertEqual(event.start_index, index)
                 self.assertEqual(event.start_time, manager1.data.dataframe.loc[index, "onset"])
                 if not event.end_time:
                     self.assertEqual(event.end_index, len(manager1.data.dataframe))
-                
         print("to here")
 
     # def test_constructor(self):
@@ -56,7 +51,7 @@ def test_constructor(self):
     #         self.assertEqual(hed, manager1.hed_strings[i])
     #         self.assertEqual(context, manager1.contexts[i])
     #         i = i + 1
-    # 
+
     # def test_constructor_from_assembled(self):
     #     hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
     #     manager1 = HedContextManager(hed_strings, self.schema)
@@ -64,12 +59,12 @@ def test_constructor(self):
     #                      "The constructor for assembled strings has expected # of strings")
     #     self.assertEqual(len(manager1.onset_list), 261,
     #                      "The constructor for assembled strings has onset_list of correct length")
-    # 
+
     # def test_constructor_unmatched(self):
     #     with self.assertRaises(HedFileError) as context:
     #         HedContextManager(self.test_strings2, self.schema)
     #     self.assertEqual(context.exception.args[0], 'UnmatchedOffset')
-    # 
+
     # def test_constructor_multiple_values(self):
     #     manager = HedContextManager(self.test_strings3, self.schema)
     #     self.assertEqual(len(manager.onset_list), 3, "Constructor should have right number of onsets")
diff --git a/tests/tools/analysis/test_hed_context_manager.py b/tests/tools/analysis/test_hed_context_manager.py
index 26e0f4e87..2ac042453 100644
--- a/tests/tools/analysis/test_hed_context_manager.py
+++ b/tests/tools/analysis/test_hed_context_manager.py
@@ -77,8 +77,8 @@ def test_constructor1(self):
         self.assertEqual(cont.exception.args[0], "ContextRequiresSchema")
 
     def test_iter(self):
-        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
-                                       join_columns=True, shrink_defs=True, expand_defs=False)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         i = 0
         for hed, context in manager1.iter_context():
@@ -87,8 +87,8 @@ def test_iter(self):
             i = i + 1
 
     def test_constructor_from_assembled(self):
-        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
-                                       join_columns=True, shrink_defs=True, expand_defs=False)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         self.assertEqual(len(manager1.hed_strings), 200,
                          "The constructor for assembled strings has expected # of strings")
diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py
index 76b0a9eaf..0950ea909 100644
--- a/tests/tools/analysis/test_hed_tag_counts.py
+++ b/tests/tools/analysis/test_hed_tag_counts.py
@@ -25,7 +25,7 @@ def setUpClass(cls):
         cls.hed_schema = schema
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
         input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
-        input_df, def_dict = assemble_hed(input_data, expand_defs=False)
+        input_df, def_dict = assemble_hed(input_data, sidecar1, schema, expand_defs=False)
         cls.input_df = input_df
         cls.def_dict = def_dict
 
diff --git a/tests/tools/analysis/test_hed_type_counts.py b/tests/tools/analysis/test_hed_type_counts.py
index 711b8d4c9..c4fd22cab 100644
--- a/tests/tools/analysis/test_hed_type_counts.py
+++ b/tests/tools/analysis/test_hed_type_counts.py
@@ -6,7 +6,7 @@
 from hed.tools.analysis.hed_context_manager import HedContextManager
 from hed.tools.analysis.hed_type_values import HedTypeValues
 from hed.tools.analysis.hed_type_counts import HedTypeCount, HedTypeCounts
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -19,10 +19,10 @@ def setUpClass(cls):
         events_path = os.path.realpath(os.path.join(bids_root_path,
                                        'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        sidecar1 = Sidecar(sidecar_path, hed_schema=schema, name='face_sub1_json')
-        input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
-        hed_strings1 = get_assembled_strings(input_data, hed_schema=schema, expand_defs=False)
-        definitions1 = input_data.get_definitions(as_strings=False).gathered_defs
+        sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
+        hed_strings1, definitions1 = get_assembled(input_data, sidecar1, schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
         cls.var_type1 = HedTypeValues(HedContextManager(hed_strings1, schema), definitions1, 'run-01',
                                       type_tag='condition-variable')
 
diff --git a/tests/tools/analysis/test_hed_type_definitions.py b/tests/tools/analysis/test_hed_type_definitions.py
index 7a66d7e8e..15cbedce2 100644
--- a/tests/tools/analysis/test_hed_type_definitions.py
+++ b/tests/tools/analysis/test_hed_type_definitions.py
@@ -42,9 +42,10 @@ def setUpClass(cls):
         events_path = os.path.realpath(os.path.join(bids_root_path,
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        sidecar1 = Sidecar(sidecar_path, hed_schema=schema, name='face_sub1_json')
-        cls.input_data = TabularInput(events_path, hed_schema=schema, sidecar=sidecar1, name="face_sub1_events")
+        sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.schema = schema
+        cls.sidecar1 = sidecar1
 
     def test_constructor(self):
         def_man = HedTypeDefinitions(self.definitions1, self.schema)
@@ -54,8 +55,8 @@ def test_constructor(self):
         self.assertEqual(len(def_man.def_map), len(def_man.definitions),
                          "Constructor condition_map should be the same length as the definitions dictionary")
 
-    def test_constructor_from_tabular_input(self):
-        definitions = self.input_data.get_definitions(as_strings=False).gathered_defs
+    def test_constructor_from_sidecar(self):
+        definitions = self.sidecar1.get_def_dict(self.schema)
         def_man = HedTypeDefinitions(definitions, self.schema)
         self.assertIsInstance(def_man, HedTypeDefinitions,
                               "Constructor should create a HedTypeDefinitions from a tabular input")
diff --git a/tests/tools/analysis/test_hed_type_factors.py b/tests/tools/analysis/test_hed_type_factors.py
index 5615453da..5821e2675 100644
--- a/tests/tools/analysis/test_hed_type_factors.py
+++ b/tests/tools/analysis/test_hed_type_factors.py
@@ -10,7 +10,7 @@
 from hed.tools.analysis.hed_context_manager import HedContextManager
 from hed.tools.analysis.hed_type_values import HedTypeValues
 from hed.tools.analysis.hed_type_factors import HedTypeFactors
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -57,8 +57,9 @@ def setUpClass(cls):
         events_path = os.path.realpath(os.path.join(bids_root_path,
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        sidecar1 = Sidecar(sidecar_path, hed_schema=schema, name='face_sub1_json')
-        cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
+        cls.sidecar1 = sidecar1
         cls.schema = schema
 
     def test_with_mixed(self):
@@ -73,9 +74,9 @@ def test_with_mixed(self):
         self.assertIsInstance(summary1, dict)
 
     def test_tabular_input(self):
-        test_strings1 = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
-        definitions = self.input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01')
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), definitions, 'run-01')
         self.assertIsInstance(var_manager, HedTypeValues,
                               "Constructor should create a HedTypeManager from a tabular input")
         var_fact = var_manager.get_type_value_factors('face-type')
@@ -154,8 +155,8 @@ def test_count_events(self):
         self.assertIsNone(max_multiple2, "_count_level_events should not have a max multiple for empty list")
 
     def test_get_summary(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
-        definitions = self.input_data.get_definitions(as_strings=False).gathered_defs
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
         var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), definitions, 'run-01')
         var_key = var_manager.get_type_value_factors('key-assignment')
         sum_key = var_key.get_summary()
diff --git a/tests/tools/analysis/test_hed_type_manager.py b/tests/tools/analysis/test_hed_type_manager.py
index 82bdf0e8b..9fd7abce2 100644
--- a/tests/tools/analysis/test_hed_type_manager.py
+++ b/tests/tools/analysis/test_hed_type_manager.py
@@ -6,7 +6,7 @@
 from hed.tools.analysis.hed_type_values import HedTypeValues
 from hed.tools.analysis.hed_type_factors import HedTypeFactors
 from hed.tools.analysis.hed_type_manager import HedTypeManager
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -18,14 +18,16 @@ def setUp(self):
         events_path = os.path.realpath(os.path.join(bids_root_path,
                                        'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        sidecar1 = Sidecar(sidecar_path, hed_schema=schema, name='face_sub1_json')
-        self.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
-        self.hed_strings = get_assembled_strings(self.input_data, hed_schema=schema, expand_defs=False)
-        self.hed_schema = schema
-        self.definitions = self.input_data.get_definitions()
+        sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
+        self.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
+        self.hed_strings, self.definitions = get_assembled(self.input_data, sidecar1, schema, 
+                                                           extra_def_dicts=None,
+                                                           join_columns=True, shrink_defs=True, expand_defs=False)
+        self.sidecar1 = sidecar1
+        self.schema = schema
 
     def test_constructor(self):
-        var_manager = HedTypeManager(self.hed_strings, self.hed_schema, self.definitions)
+        var_manager = HedTypeManager(self.hed_strings, self.schema, self.definitions)
         self.assertIsInstance(var_manager, HedTypeManager,
                               "Constructor should create a HedTypeManager from a tabular input")
         self.assertEqual(len(var_manager.context_manager.hed_strings), len(var_manager.context_manager.contexts),
@@ -33,7 +35,7 @@ def test_constructor(self):
         self.assertFalse(var_manager._type_tag_map, "constructor has empty map")
 
     def test_add_type_variable(self):
-        var_manager = HedTypeManager(self.hed_strings, self.hed_schema, self.definitions)
+        var_manager = HedTypeManager(self.hed_strings, self.schema, self.definitions)
         self.assertFalse(var_manager._type_tag_map, "constructor has empty map")
         var_manager.add_type_variable("Condition-variable")
         self.assertEqual(len(var_manager._type_tag_map), 1,
@@ -48,10 +50,10 @@ def test_add_type_variable(self):
                          "add_type_variable has 2 element map after two types are added")
 
     def test_get_factor_vectors(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
         base_length = len(hed_strings)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         var_manager.add_type_variable("Condition-variable")
         var_manager.add_type_variable("task")
         df_cond = var_manager.get_factor_vectors("condition-variable")
@@ -64,9 +66,9 @@ def test_get_factor_vectors(self):
         self.assertIsNone(df_baloney, "get_factor_vectors returns None if no factors")
 
     def test_get_type_variable(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         var_manager.add_type_variable("Condition-variable")
         type_var = var_manager.get_type_variable("condition-variable")
         self.assertIsInstance(type_var, HedTypeValues,
@@ -75,9 +77,9 @@ def test_get_type_variable(self):
         self.assertIsNone(type_var, "get_type_variable returns None if the key does not exist")
 
     def test_get_type_variable_def_names(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         var_manager.add_type_variable("Condition-variable")
         def_names = var_manager.get_type_tag_def_names("condition-variable")
         self.assertEqual(len(def_names), 7,
@@ -88,9 +90,9 @@ def test_get_type_variable_def_names(self):
         self.assertFalse(def_names, "get_type_tag_def_names returns empty if the type does not exist")
 
     def test_get_variable_type_map(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         var_manager.add_type_variable("Condition-variable")
         this_var = var_manager.get_type_variable("condition-variable")
         self.assertIsInstance(this_var, HedTypeValues,
@@ -104,9 +106,9 @@ def test_get_variable_type_map(self):
                          "get_type_variable_map map has right length when key upper case")
 
     def test_get_type_variable_factor(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         var_manager.add_type_variable("Condition-variable")
         var_factor1 = var_manager.get_type_tag_factor("condition-variable", "key-assignment")
         self.assertIsInstance(var_factor1, HedTypeFactors,
@@ -117,9 +119,9 @@ def test_get_type_variable_factor(self):
         self.assertIsNone(var_factor3, "get_type_tag_factor returns None if type variable does not exist")
 
     def test_type_variables(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = self.input_data.get_definitions
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, definitions)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         vars1 = var_manager.type_variables
         self.assertFalse(vars1, "type_variables is empty if no types have been added")
         var_manager.add_type_variable("Condition-variable")
@@ -129,9 +131,9 @@ def test_type_variables(self):
         self.assertEqual(len(vars2), 2, "type_variables return list is right length")
 
     def test_summarize_all(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.hed_schema, expand_defs=False)
-        def_mapper = self.input_data._def_mapper
-        var_manager = HedTypeManager(hed_strings, self.hed_schema, def_mapper)
+        hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                                 join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeManager(hed_strings, self.schema, definitions)
         summary1 = var_manager.summarize_all()
         self.assertIsInstance(summary1, dict, "summarize_all returns a dictionary when nothing has been added")
         self.assertFalse(summary1, "summarize_all return dictionary is empty when nothing has been added")
diff --git a/tests/tools/analysis/test_hed_type_values.py b/tests/tools/analysis/test_hed_type_values.py
index c5ad5557a..4b3125353 100644
--- a/tests/tools/analysis/test_hed_type_values.py
+++ b/tests/tools/analysis/test_hed_type_values.py
@@ -10,7 +10,7 @@
 from hed.schema.hed_schema_io import load_schema_version
 from hed.tools.analysis.hed_context_manager import HedContextManager
 from hed.tools.analysis.hed_type_values import HedTypeValues
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -53,12 +53,11 @@ def setUpClass(cls):
         cls.events_path = os.path.realpath(os.path.join(bids_root_path,
                                            'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         cls.sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        cls.hed_schema = schema
+        cls.schema = schema
 
     def test_constructor(self):
-        strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1]
-        strings2 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1]
-        con_man = HedContextManager(strings1, hed_schema=self.hed_schema)
+        strings1 = [HedString(hed, hed_schema=self.schema) for hed in self.test_strings1]
+        con_man = HedContextManager(strings1, hed_schema=self.schema)
         type_var = HedTypeValues(con_man, self.defs, 'run-01')
         self.assertIsInstance(type_var, HedTypeValues,
                               "Constructor should create a HedTypeManager from strings")
@@ -66,22 +65,20 @@ def test_constructor(self):
                          "Constructor ConditionVariables should have the right length")
 
     def test_constructor_from_tabular_input(self):
-        sidecar1 = Sidecar(self.sidecar_path, hed_schema=self.hed_schema, name='face_sub1_json')
-        input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, 
-                                  sidecar=sidecar1, name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), definitions, 'run-01')
+        sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events")
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01')
         self.assertIsInstance(var_manager, HedTypeValues,
                               "Constructor should create a HedTypeManager from a tabular input")
 
     def test_constructor_variable_caps(self):
-        sidecar1 = Sidecar(self.sidecar_path, hed_schema=self.hed_schema, name='face_sub1_json')
-        input_data = TabularInput(self.events_path, sidecar=sidecar1, hed_schema=self.hed_schema,
-                                  name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema),
+        sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events")
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema),
                                     definitions, 'run-01', type_tag="Condition-variable")
         self.assertIsInstance(var_manager, HedTypeValues,
                               "Constructor should create a HedTypeManager variable caps")
@@ -89,34 +86,33 @@ def test_constructor_variable_caps(self):
     def test_constructor_variable_task(self):
         sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
         input_data = TabularInput(self.events_path, sidecar=sidecar1, name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema),
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema),
                                     definitions, 'run-01', type_tag="task")
         self.assertIsInstance(var_manager, HedTypeValues,
                               "Constructor should create a HedTypeManager variable task")
 
     def test_constructor_multiple_values(self):
-        test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings2]
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), self.defs, 'run-01')
+        hed_strings = [HedString(hed, self.schema) for hed in self.test_strings2]
+        var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01')
         self.assertIsInstance(var_manager, HedTypeValues,
                               "Constructor should create a HedTypeManager from strings")
         self.assertEqual(len(var_manager._type_value_map), 3,
                          "Constructor should have right number of type_variables if multiple")
 
     def test_constructor_unmatched(self):
-        test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings3]
+        hed_strings = [HedString(hed, self.schema) for hed in self.test_strings3]
         with self.assertRaises(HedFileError) as context:
-            HedTypeValues(HedContextManager(test_strings1, self.hed_schema), self.defs, 'run-01')
+            HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01')
         self.assertEqual(context.exception.args[0], 'UnmatchedOffset')
 
     def test_get_variable_factors(self):
-        sidecar1 = Sidecar(self.sidecar_path, hed_schema=self.hed_schema, name='face_sub1_json')
-        input_data = TabularInput(self.events_path, sidecar=sidecar1, hed_schema=self.hed_schema,
-                                  name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), definitions, 'run-01')
+        sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events")
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01')
         df_new1 = var_manager.get_type_factors()
         self.assertIsInstance(df_new1, DataFrame)
         self.assertEqual(len(df_new1), 200)
@@ -128,47 +124,45 @@ def test_get_variable_factors(self):
         self.assertIsNone(df_new3)
 
     def test_str(self):
-        sidecar1 = Sidecar(self.sidecar_path, hed_schema=self.hed_schema, name='face_sub1_json')
-        input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, 
-                                  sidecar=sidecar1, name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), definitions, 'run-01')
+        sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events")
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01')
         new_str = str(var_manager)
         self.assertIsInstance(new_str, str)
 
     def test_summarize_variables(self):
-        sidecar1 = Sidecar(self.sidecar_path, hed_schema=self.hed_schema, name='face_sub1_json')
-        input_data = TabularInput(self.events_path, hed_schema=self.hed_schema, 
-                                  sidecar=sidecar1, name="face_sub1_events")
-        test_strings1 = get_assembled_strings(input_data, hed_schema=self.hed_schema, expand_defs=False)
-        definitions = input_data.get_definitions(as_strings=False).gathered_defs
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), definitions, 'run-01')
+        sidecar1 = Sidecar(self.sidecar_path, name='face_sub1_json')
+        input_data = TabularInput(self.events_path, sidecar1, name="face_sub1_events")
+        test_strings1, definitions = get_assembled(input_data, sidecar1, self.schema, extra_def_dicts=None,
+                                                   join_columns=True, shrink_defs=True, expand_defs=False)
+        var_manager = HedTypeValues(HedContextManager(test_strings1, self.schema), definitions, 'run-01')
         summary = var_manager.get_summary()
         self.assertIsInstance(summary, dict, "get_summary produces a dictionary if not json")
         self.assertEqual(len(summary), 3, "Summarize_variables has right number of condition type_variables")
         self.assertIn("key-assignment", summary, "get_summary has a correct key")
 
     def test_extract_definition_variables(self):
-        test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1]
-        var_manager = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), self.defs, 'run-01')
+        hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1]
+        var_manager = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01')
         var_levels = var_manager._type_value_map['var3'].levels
         self.assertNotIn('cond3/7', var_levels,
                          "_extract_definition_variables before extraction def/cond3/7 not in levels")
-        tag = HedTag("Def/Cond3/7", hed_schema=self.hed_schema)
+        tag = HedTag("Def/Cond3/7", hed_schema=self.schema)
         var_manager._extract_definition_variables(tag, 5)
         self.assertIn('cond3/7', var_levels,
                       "_extract_definition_variables after extraction def/cond3/7 not in levels")
 
     def test_get_variable_names(self):
-        test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1]
-        conditions1 = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), self.defs, 'run-01')
+        hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1]
+        conditions1 = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01')
         list1 = conditions1.get_type_value_names()
         self.assertEqual(len(list1), 8, "get_variable_tags list should have the right length")
 
     def test_get_variable_def_names(self):
-        test_strings1 = [HedString(hed, hed_schema=self.hed_schema) for hed in self.test_strings1]
-        conditions1 = HedTypeValues(HedContextManager(test_strings1, self.hed_schema), self.defs, 'run-01')
+        hed_strings = [HedString(hed, self.schema) for hed in self.test_strings1]
+        conditions1 = HedTypeValues(HedContextManager(hed_strings, self.schema), self.defs, 'run-01')
         list1 = conditions1.get_type_def_names()
         self.assertEqual(len(list1), 5, "get_type_def_names list should have the right length")
 
diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
index d82d14ea0..5f5ee41bf 100644
--- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
+++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
@@ -2,6 +2,7 @@
 import os
 import unittest
 import pandas as pd
+from hed.models.df_util import get_assembled
 from hed.tools.remodeling.dispatcher import Dispatcher
 from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummaryContext
 
@@ -96,16 +97,17 @@ def test_quick3(self):
                     }
                   }
         my_json_str = json.dumps(my_json)
-        my_sidecar = Sidecar(StringIO(my_json_str), hed_schema=my_schema)
+        my_sidecar = Sidecar(StringIO(my_json_str))
         data = [[0.5, 0, 'code1', 'Description/This is a test, Label/Temp, (Def/Blech1, Green)'],
                 [0.6, 0, 'code2', 'Sensory-event, ((Description/Animal, Condition-variable/Blech))']]
         df = pd.DataFrame(data, columns=['onset', 'duration', 'code', 'HED'])
-        input_data = TabularInput(df, hed_schema=my_schema, sidecar=my_sidecar)
+        input_data = TabularInput(df, sidecar=my_sidecar)
         counts = HedTagCounts('myName', 2)
         summary_dict = {}
-        for objs in input_data.iter_dataframe(hed_ops=[my_schema], return_string_only=False,
-                                              expand_defs=True, remove_definitions=True):
-            counts.update_event_counts(objs['HED'], 'myName')
+        hed_strings = get_assembled(input_data, my_sidecar, my_schema, extra_def_dicts=None, join_columns=True,
+                                    shrink_defs=False, expand_defs=True)
+        for hed in hed_strings:
+            counts.update_event_counts(hed, 'myName')
         summary_dict['myName'] = counts
 
     def test_quick4(self):
@@ -117,10 +119,13 @@ def test_quick4(self):
         data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv'))
         json_path = os.path.realpath(os.path.join(path, 'task-FacePerception_events.json'))
         my_schema = load_schema_version('8.1.0')
-        sidecar = Sidecar(json_path, hed_schema=my_schema)
-        input_data = TabularInput(data_path, hed_schema=my_schema, sidecar=sidecar)
+        sidecar = Sidecar(json_path,)
+        input_data = TabularInput(data_path, sidecar=sidecar)
         counts = HedTagCounts('myName', 2)
         summary_dict = {}
+        hed_strings, definitions = get_assembled(input_data, sidecar, my_schema, 
+                                                 extra_def_dicts=None, join_columns=True,
+                                                 shrink_defs=False, expand_defs=True)
         for objs in input_data.iter_dataframe(hed_ops=[my_schema], return_string_only=False,
                                               expand_defs=True, remove_definitions=True):
             x = objs['HED']

From 84cf4e01679d0be93129e2b7143ca60cd3fa973b Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 17 Mar 2023 18:44:20 -0500
Subject: [PATCH 06/19] Add more unit tests.  better nan and empty column
 handling

---
 hed/models/base_input.py               |  58 ++++--
 hed/validator/spreadsheet_validator.py |   1 +
 tests/models/test_base_file_input.py   | 103 ---------
 tests/models/test_base_input.py        | 276 +++++++++++++++++++++++++
 tests/models/test_df_util.py           |  45 +++-
 5 files changed, 357 insertions(+), 126 deletions(-)
 delete mode 100644 tests/models/test_base_file_input.py
 create mode 100644 tests/models/test_base_input.py

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 869bc4ea6..f50ea5e4c 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -7,6 +7,7 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors.error_reporter import ErrorHandler
+import pandas as pd
 
 
 class BaseInput:
@@ -66,10 +67,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         elif not file:
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
-            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
-                                              dtype=str, keep_default_na=True, na_values=None)
-            # Convert nan values to a known value
-            self._dataframe = self._dataframe.fillna("n/a")
+            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str)
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -364,7 +362,7 @@ def assemble(self, mapper=None):
         """
         if mapper is None:
             mapper = self._mapper
-        import pandas as pd
+
         transformers, need_categorical = mapper.get_transformers()
         if not transformers:
             return None
@@ -374,35 +372,53 @@ def assemble(self, mapper=None):
 
         all_columns = all_columns.transform(transformers)
 
-        possible_column_references = [f"{column_name}" for column_name in self.columns if
-                                      column_name.lower() != "hed"]
+        return self._insert_columns(all_columns, list(transformers.keys()))
+
+    @staticmethod
+    def _find_column_refs(df):
         found_column_references = []
-        for column_name in all_columns:
-            df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
-            u_vals = pd.Series([j for i in df for j in i], dtype=str)
+        for column_name in df:
+            df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
+            u_vals = pd.Series([j for i in df_temp for j in i], dtype=str)
             u_vals = u_vals.unique()
             for val in u_vals:
                 if val not in found_column_references:
                     found_column_references.append(val)
 
+        return found_column_references
+
+    @staticmethod
+    def _insert_columns(df, known_columns=None):
+        if known_columns is None:
+            known_columns = list(df.columns)
+        possible_column_references = [f"{column_name}" for column_name in df.columns if
+                                      column_name.lower() != "hed"]
+        found_column_references = BaseInput._find_column_refs(df)
+
+        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
+        if invalid_replacements:
+            # todo: This check may be moved to validation
+            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
-        column_names = list(transformers.keys())
+        # todo: break this into a sub function(probably)
+        column_names = known_columns
         for column_name in valid_replacements:
             column_names.remove(column_name)
-        saved_columns = all_columns[valid_replacements]
+        saved_columns = df[valid_replacements]
         for column_name in column_names:
             for replacing_name in valid_replacements:
                 column_name_brackets = f"[{replacing_name}]"
-                all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
-                                                     in zip(all_columns[column_name], saved_columns[replacing_name]))
-        all_columns = all_columns[column_names]
+                df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
+                                            in zip(df[column_name], saved_columns[replacing_name]))
+        df = df[column_names]
 
-        return all_columns
+        return df
 
     @staticmethod
     def combine_dataframe(dataframe):
-        """ Combines all columns in the given dataframe into a single hed string series.
+        """ Combines all columns in the given dataframe into a single HED string series,
+            skipping empty columns and columns with empty strings.
 
         Parameters:
             dataframe(Dataframe): The dataframe to combine
@@ -410,8 +426,8 @@ def combine_dataframe(dataframe):
         Returns:
             Series: the assembled series
         """
-        dataframe = dataframe.agg(', '.join, axis=1)
+        dataframe = dataframe.agg(
+            lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1
+        )
 
-        # Potentially better ways to handle removing n/a by never inserting them to begin with.
-        dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True)
-        return dataframe
+        return dataframe
\ No newline at end of file
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index 136b5aa73..ba1f341ac 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -41,6 +41,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
+            # todo ian: Add more checks here for column inserters
             data = data.dataframe_a
 
         # Check the rows of the input data
diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py
deleted file mode 100644
index 8314072bd..000000000
--- a/tests/models/test_base_file_input.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import unittest
-import os
-import shutil
-from hed import Sidecar
-from hed import BaseInput, TabularInput
-from hed.models.column_mapper import ColumnMapper
-from hed.models import DefinitionDict
-from hed import schema
-
-# TODO: Add tests for base_file_input and include correct handling of 'n/a'
-
-
-class Test(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # todo: clean up these unit tests/add more
-        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
-        cls.base_data_dir = base_data_dir
-        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
-        # cls.json_def_filename = json_def_filename
-        json_def_sidecar = Sidecar(json_def_filename)
-        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
-        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
-
-        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
-        cls.base_output_folder = base_output
-        os.makedirs(base_output, exist_ok=True)
-
-        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                       '../data/bids_tests/eeg_ds003645s_hed'))
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../data/schema_tests/HED8.0.0.xml'))
-        cls.bids_root_path = bids_root_path
-        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        events_path = os.path.realpath(os.path.join(bids_root_path,
-                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
-
-        cls.hed_schema = schema.load_schema(schema_path)
-        sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
-        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
-                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
-        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.base_output_folder)
-
-    def test_gathered_defs(self):
-        # todo: add unit tests for definitions in tsv file
-        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
-        expected_defs = {
-            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
-            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
-            'jsonfiledef3': '(Item/JsonDef3/#)',
-            'takesvaluedef': '(Age/#)',
-            'valueclassdef': '(Acceleration/#)'
-        }
-        self.assertEqual(defs, expected_defs)
-
-    # def test_missing_column_name_issue(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-    #
-    # def test_expand_column_issues(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
new file mode 100644
index 000000000..392599f78
--- /dev/null
+++ b/tests/models/test_base_input.py
@@ -0,0 +1,276 @@
+import io
+import unittest
+import os
+import shutil
+from hed import Sidecar
+from hed import BaseInput, TabularInput
+from hed.models.column_mapper import ColumnMapper
+from hed.models import DefinitionDict
+from hed import schema
+import pandas as pd
+import numpy as np
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # todo: clean up these unit tests/add more
+        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
+        cls.base_data_dir = base_data_dir
+        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
+        # cls.json_def_filename = json_def_filename
+        json_def_sidecar = Sidecar(json_def_filename)
+        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
+        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
+
+        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
+        cls.base_output_folder = base_output
+        os.makedirs(base_output, exist_ok=True)
+
+        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../data/bids_tests/eeg_ds003645s_hed'))
+        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                    '../data/schema_tests/HED8.0.0.xml'))
+        cls.bids_root_path = bids_root_path
+        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+        events_path = os.path.realpath(os.path.join(bids_root_path,
+                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+
+        cls.hed_schema = schema.load_schema(schema_path)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
+        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
+        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
+                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
+        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.base_output_folder)
+
+    def test_gathered_defs(self):
+        # todo: add unit tests for definitions in tsv file
+        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
+        expected_defs = {
+            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
+            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
+            'jsonfiledef3': '(Item/JsonDef3/#)',
+            'takesvaluedef': '(Age/#)',
+            'valueclassdef': '(Acceleration/#)'
+        }
+        self.assertEqual(defs, expected_defs)
+
+    # def test_missing_column_name_issue(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+    #
+    # def test_expand_column_issues(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+
+
+class TestInsertColumns(unittest.TestCase):
+
+    def test_insert_columns_simple(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action"],
+            "column2": ["Item"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_insert_columns_multiple_rows(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action", "Event, Action"],
+            "column2": ["Item", "Subject"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Action", "Event, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    # def test_insert_columns_no_circular_reference(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["[column2], Event, Action"],
+    #         "column2": ["[column1], Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+    def test_insert_columns_multiple_columns(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, [column3], Action"],
+            "column2": ["Item"],
+            "column3": ["Subject"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Subject, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_insert_columns_invalid_column_name(self):
+        df = pd.DataFrame({
+            "column1": ["[invalid_column], Event, Action"],
+            "column2": ["Item"]
+        })
+        with self.assertRaises(ValueError):
+            result = BaseInput._insert_columns(df)
+
+    def test_insert_columns_four_columns(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, [column3], Action"],
+            "column2": ["Item"],
+            "column3": ["Subject"],
+            "column4": ["Data"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Subject, Action"],
+            "column4": ["Data"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    # def test_insert_columns_invalid_syntax(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["column2], Event, Action"],
+    #         "column2": ["Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+    # def test_insert_columns_no_self_reference(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["[column1], Event, Action"],
+    #         "column2": ["Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+
+class TestCombineDataframe(unittest.TestCase):
+    def test_combine_dataframe_with_strings(self):
+        data = {
+            'A': ['apple', 'banana', 'cherry'],
+            'B': ['dog', 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', 'piano']
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_nan_values(self):
+        data = {
+            'A': ['apple', np.nan, 'cherry'],
+            'B': [np.nan, 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', np.nan]
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_empty_values(self):
+        data = {
+            'A': ['apple', '', 'cherry'],
+            'B': ['', 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', '']
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_mixed_values(self):
+        data = {
+            'A': ['apple', np.nan, 'cherry', 'n/a', ''],
+            'B': [np.nan, 'elephant', 'fox', 'n/a', ''],
+            'C': ['guitar', 'harmonica', np.nan, 'n/a', '']
+        }
+        df = pd.DataFrame(data)
+        csv_buffer = io.StringIO()
+        df.to_csv(csv_buffer, header=False, index=False)
+        csv_buffer.seek(0)
+
+        # Use the same loading function we normally use to verify n/a translates right.
+        loaded_df = pd.read_csv(csv_buffer, header=None)
+        result = BaseInput.combine_dataframe(loaded_df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
+        self.assertTrue(result.equals(expected))
+
+
+class TestColumnRefs(unittest.TestCase):
+    def test_simple_column_refs(self):
+        data1 = {
+            'A': ['[col1], [col2]', 'tag1, tag2'],
+            'B': ['tag3, tag4', '[col3]'],
+        }
+        df1 = pd.DataFrame(data1)
+        result1 = BaseInput._find_column_refs(df1)
+        expected1 = ['col1', 'col2', 'col3']
+        self.assertEqual(result1, expected1)
+
+    def test_mixed_cases_and_patterns(self):
+        data2 = {
+            'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
+        }
+        df2 = pd.DataFrame(data2)
+        result2 = BaseInput._find_column_refs(df2)
+        expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
+        self.assertEqual(result2, expected2)
+
+    def test_no_column_references(self):
+        data3 = {
+            'A': ['tag1, tag2', 'tag3, tag4'],
+            'B': ['tag5, tag6', 'tag7, tag8'],
+        }
+        df3 = pd.DataFrame(data3)
+        result3 = BaseInput._find_column_refs(df3)
+        expected3 = []
+        self.assertEqual(result3, expected3)
+
+    def test_incomplete_square_brackets(self):
+        data4 = {
+            'A': ['[col1, [col2]', 'tag1, [Col3'],
+            'B': ['tag3, [COL4', '[col5, col6]'],
+        }
+        df4 = pd.DataFrame(data4)
+        result4 = BaseInput._find_column_refs(df4)
+        expected4 = ['col2']
+        self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index bc9c907b7..e10e2a4a3 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -3,7 +3,7 @@
 
 
 from hed import load_schema_version
-from hed.models.df_util import shrink_defs, expand_defs
+from hed.models.df_util import shrink_defs, expand_defs, convert_to_form
 from hed import DefinitionDict
 
 
@@ -111,4 +111,45 @@ def test_expand_defs_series_placeholder(self):
         series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
         expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
         result = expand_defs(series, self.schema, self.def_dict, None)
-        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file
+        pd.testing.assert_series_equal(result, expected_series)
+
+
+class TestConvertToForm(unittest.TestCase):
+    def setUp(self):
+        self.schema = load_schema_version()
+
+    def test_convert_to_form_short_tags(self):
+        df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+        expected_df = pd.DataFrame({"column1": ["Azure,See"]})
+        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_long_tags(self):
+        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
+        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_series_short_tags(self):
+        series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+        expected_series = pd.Series(["Azure,See"])
+        result = convert_to_form(series, self.schema, "short_tag")
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_convert_to_form_series_long_tags(self):
+        series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
+        expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+        result = convert_to_form(series, self.schema, "long_tag")
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_convert_to_form_multiple_tags_short(self):
+        df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]})
+        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_multiple_tags_long(self):
+        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
\ No newline at end of file

From c8db8ba8f511a98078a203f17b7a622c7e3c7170 Mon Sep 17 00:00:00 2001
From: VisLab <1189050+VisLab@users.noreply.github.com>
Date: Sun, 19 Mar 2023 07:36:58 -0500
Subject: [PATCH 07/19] Revert "Add more unit tests.  better nan and empty
 column handling"

---
 hed/models/base_input.py               |  58 ++----
 hed/validator/spreadsheet_validator.py |   1 -
 tests/models/test_base_file_input.py   | 103 +++++++++
 tests/models/test_base_input.py        | 276 -------------------------
 tests/models/test_df_util.py           |  45 +---
 5 files changed, 126 insertions(+), 357 deletions(-)
 create mode 100644 tests/models/test_base_file_input.py
 delete mode 100644 tests/models/test_base_input.py

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index f50ea5e4c..869bc4ea6 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -7,7 +7,6 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors.error_reporter import ErrorHandler
-import pandas as pd
 
 
 class BaseInput:
@@ -67,7 +66,10 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         elif not file:
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
-            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str)
+            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
+                                              dtype=str, keep_default_na=True, na_values=None)
+            # Convert nan values to a known value
+            self._dataframe = self._dataframe.fillna("n/a")
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -362,7 +364,7 @@ def assemble(self, mapper=None):
         """
         if mapper is None:
             mapper = self._mapper
-
+        import pandas as pd
         transformers, need_categorical = mapper.get_transformers()
         if not transformers:
             return None
@@ -372,53 +374,35 @@ def assemble(self, mapper=None):
 
         all_columns = all_columns.transform(transformers)
 
-        return self._insert_columns(all_columns, list(transformers.keys()))
-
-    @staticmethod
-    def _find_column_refs(df):
+        possible_column_references = [f"{column_name}" for column_name in self.columns if
+                                      column_name.lower() != "hed"]
         found_column_references = []
-        for column_name in df:
-            df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
-            u_vals = pd.Series([j for i in df_temp for j in i], dtype=str)
+        for column_name in all_columns:
+            df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
+            u_vals = pd.Series([j for i in df for j in i], dtype=str)
             u_vals = u_vals.unique()
             for val in u_vals:
                 if val not in found_column_references:
                     found_column_references.append(val)
 
-        return found_column_references
-
-    @staticmethod
-    def _insert_columns(df, known_columns=None):
-        if known_columns is None:
-            known_columns = list(df.columns)
-        possible_column_references = [f"{column_name}" for column_name in df.columns if
-                                      column_name.lower() != "hed"]
-        found_column_references = BaseInput._find_column_refs(df)
-
-        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
-        if invalid_replacements:
-            # todo: This check may be moved to validation
-            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
-        # todo: break this into a sub function(probably)
-        column_names = known_columns
+        column_names = list(transformers.keys())
         for column_name in valid_replacements:
             column_names.remove(column_name)
-        saved_columns = df[valid_replacements]
+        saved_columns = all_columns[valid_replacements]
         for column_name in column_names:
             for replacing_name in valid_replacements:
                 column_name_brackets = f"[{replacing_name}]"
-                df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
-                                            in zip(df[column_name], saved_columns[replacing_name]))
-        df = df[column_names]
+                all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
+                                                     in zip(all_columns[column_name], saved_columns[replacing_name]))
+        all_columns = all_columns[column_names]
 
-        return df
+        return all_columns
 
     @staticmethod
     def combine_dataframe(dataframe):
-        """ Combines all columns in the given dataframe into a single HED string series,
-            skipping empty columns and columns with empty strings.
+        """ Combines all columns in the given dataframe into a single hed string series.
 
         Parameters:
             dataframe(Dataframe): The dataframe to combine
@@ -426,8 +410,8 @@ def combine_dataframe(dataframe):
         Returns:
             Series: the assembled series
         """
-        dataframe = dataframe.agg(
-            lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1
-        )
+        dataframe = dataframe.agg(', '.join, axis=1)
 
-        return dataframe
\ No newline at end of file
+        # Potentially better ways to handle removing n/a by never inserting them to begin with.
+        dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True)
+        return dataframe
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ba1f341ac..136b5aa73 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -41,7 +41,6 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
-            # todo ian: Add more checks here for column inserters
             data = data.dataframe_a
 
         # Check the rows of the input data
diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py
new file mode 100644
index 000000000..8314072bd
--- /dev/null
+++ b/tests/models/test_base_file_input.py
@@ -0,0 +1,103 @@
+import unittest
+import os
+import shutil
+from hed import Sidecar
+from hed import BaseInput, TabularInput
+from hed.models.column_mapper import ColumnMapper
+from hed.models import DefinitionDict
+from hed import schema
+
+# TODO: Add tests for base_file_input and include correct handling of 'n/a'
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # todo: clean up these unit tests/add more
+        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
+        cls.base_data_dir = base_data_dir
+        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
+        # cls.json_def_filename = json_def_filename
+        json_def_sidecar = Sidecar(json_def_filename)
+        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
+        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
+
+        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
+        cls.base_output_folder = base_output
+        os.makedirs(base_output, exist_ok=True)
+
+        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../data/bids_tests/eeg_ds003645s_hed'))
+        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                    '../data/schema_tests/HED8.0.0.xml'))
+        cls.bids_root_path = bids_root_path
+        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+        events_path = os.path.realpath(os.path.join(bids_root_path,
+                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+
+        cls.hed_schema = schema.load_schema(schema_path)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
+        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
+        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
+                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
+        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.base_output_folder)
+
+    def test_gathered_defs(self):
+        # todo: add unit tests for definitions in tsv file
+        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
+        expected_defs = {
+            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
+            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
+            'jsonfiledef3': '(Item/JsonDef3/#)',
+            'takesvaluedef': '(Age/#)',
+            'valueclassdef': '(Acceleration/#)'
+        }
+        self.assertEqual(defs, expected_defs)
+
+    # def test_missing_column_name_issue(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+    #
+    # def test_expand_column_issues(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
deleted file mode 100644
index 392599f78..000000000
--- a/tests/models/test_base_input.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import io
-import unittest
-import os
-import shutil
-from hed import Sidecar
-from hed import BaseInput, TabularInput
-from hed.models.column_mapper import ColumnMapper
-from hed.models import DefinitionDict
-from hed import schema
-import pandas as pd
-import numpy as np
-
-
-class Test(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # todo: clean up these unit tests/add more
-        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
-        cls.base_data_dir = base_data_dir
-        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
-        # cls.json_def_filename = json_def_filename
-        json_def_sidecar = Sidecar(json_def_filename)
-        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
-        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
-
-        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
-        cls.base_output_folder = base_output
-        os.makedirs(base_output, exist_ok=True)
-
-        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                       '../data/bids_tests/eeg_ds003645s_hed'))
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../data/schema_tests/HED8.0.0.xml'))
-        cls.bids_root_path = bids_root_path
-        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        events_path = os.path.realpath(os.path.join(bids_root_path,
-                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
-
-        cls.hed_schema = schema.load_schema(schema_path)
-        sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
-        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
-                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
-        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.base_output_folder)
-
-    def test_gathered_defs(self):
-        # todo: add unit tests for definitions in tsv file
-        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
-        expected_defs = {
-            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
-            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
-            'jsonfiledef3': '(Item/JsonDef3/#)',
-            'takesvaluedef': '(Age/#)',
-            'valueclassdef': '(Acceleration/#)'
-        }
-        self.assertEqual(defs, expected_defs)
-
-    # def test_missing_column_name_issue(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-    #
-    # def test_expand_column_issues(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-
-
-class TestInsertColumns(unittest.TestCase):
-
-    def test_insert_columns_simple(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, Action"],
-            "column2": ["Item"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_insert_columns_multiple_rows(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, Action", "Event, Action"],
-            "column2": ["Item", "Subject"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Action", "Event, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    # def test_insert_columns_no_circular_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column2], Event, Action"],
-    #         "column2": ["[column1], Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    def test_insert_columns_multiple_columns(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, [column3], Action"],
-            "column2": ["Item"],
-            "column3": ["Subject"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Subject, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_insert_columns_invalid_column_name(self):
-        df = pd.DataFrame({
-            "column1": ["[invalid_column], Event, Action"],
-            "column2": ["Item"]
-        })
-        with self.assertRaises(ValueError):
-            result = BaseInput._insert_columns(df)
-
-    def test_insert_columns_four_columns(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, [column3], Action"],
-            "column2": ["Item"],
-            "column3": ["Subject"],
-            "column4": ["Data"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Subject, Action"],
-            "column4": ["Data"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    # def test_insert_columns_invalid_syntax(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["column2], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    # def test_insert_columns_no_self_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column1], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-
-class TestCombineDataframe(unittest.TestCase):
-    def test_combine_dataframe_with_strings(self):
-        data = {
-            'A': ['apple', 'banana', 'cherry'],
-            'B': ['dog', 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', 'piano']
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_nan_values(self):
-        data = {
-            'A': ['apple', np.nan, 'cherry'],
-            'B': [np.nan, 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', np.nan]
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_empty_values(self):
-        data = {
-            'A': ['apple', '', 'cherry'],
-            'B': ['', 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', '']
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_mixed_values(self):
-        data = {
-            'A': ['apple', np.nan, 'cherry', 'n/a', ''],
-            'B': [np.nan, 'elephant', 'fox', 'n/a', ''],
-            'C': ['guitar', 'harmonica', np.nan, 'n/a', '']
-        }
-        df = pd.DataFrame(data)
-        csv_buffer = io.StringIO()
-        df.to_csv(csv_buffer, header=False, index=False)
-        csv_buffer.seek(0)
-
-        # Use the same loading function we normally use to verify n/a translates right.
-        loaded_df = pd.read_csv(csv_buffer, header=None)
-        result = BaseInput.combine_dataframe(loaded_df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
-        self.assertTrue(result.equals(expected))
-
-
-class TestColumnRefs(unittest.TestCase):
-    def test_simple_column_refs(self):
-        data1 = {
-            'A': ['[col1], [col2]', 'tag1, tag2'],
-            'B': ['tag3, tag4', '[col3]'],
-        }
-        df1 = pd.DataFrame(data1)
-        result1 = BaseInput._find_column_refs(df1)
-        expected1 = ['col1', 'col2', 'col3']
-        self.assertEqual(result1, expected1)
-
-    def test_mixed_cases_and_patterns(self):
-        data2 = {
-            'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
-        }
-        df2 = pd.DataFrame(data2)
-        result2 = BaseInput._find_column_refs(df2)
-        expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
-        self.assertEqual(result2, expected2)
-
-    def test_no_column_references(self):
-        data3 = {
-            'A': ['tag1, tag2', 'tag3, tag4'],
-            'B': ['tag5, tag6', 'tag7, tag8'],
-        }
-        df3 = pd.DataFrame(data3)
-        result3 = BaseInput._find_column_refs(df3)
-        expected3 = []
-        self.assertEqual(result3, expected3)
-
-    def test_incomplete_square_brackets(self):
-        data4 = {
-            'A': ['[col1, [col2]', 'tag1, [Col3'],
-            'B': ['tag3, [COL4', '[col5, col6]'],
-        }
-        df4 = pd.DataFrame(data4)
-        result4 = BaseInput._find_column_refs(df4)
-        expected4 = ['col2']
-        self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index e10e2a4a3..bc9c907b7 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -3,7 +3,7 @@
 
 
 from hed import load_schema_version
-from hed.models.df_util import shrink_defs, expand_defs, convert_to_form
+from hed.models.df_util import shrink_defs, expand_defs
 from hed import DefinitionDict
 
 
@@ -111,45 +111,4 @@ def test_expand_defs_series_placeholder(self):
         series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
         expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
         result = expand_defs(series, self.schema, self.def_dict, None)
-        pd.testing.assert_series_equal(result, expected_series)
-
-
-class TestConvertToForm(unittest.TestCase):
-    def setUp(self):
-        self.schema = load_schema_version()
-
-    def test_convert_to_form_short_tags(self):
-        df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
-        expected_df = pd.DataFrame({"column1": ["Azure,See"]})
-        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_long_tags(self):
-        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
-        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
-        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_series_short_tags(self):
-        series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
-        expected_series = pd.Series(["Azure,See"])
-        result = convert_to_form(series, self.schema, "short_tag")
-        pd.testing.assert_series_equal(result, expected_series)
-
-    def test_convert_to_form_series_long_tags(self):
-        series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
-        expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
-        result = convert_to_form(series, self.schema, "long_tag")
-        pd.testing.assert_series_equal(result, expected_series)
-
-    def test_convert_to_form_multiple_tags_short(self):
-        df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]})
-        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_multiple_tags_long(self):
-        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
\ No newline at end of file
+        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file

From ffced96c2ff34db483ad96e73d5cfa537ca4a284 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 17 Mar 2023 18:44:20 -0500
Subject: [PATCH 08/19] Add more unit tests.  better nan and empty column
 handling

---
 hed/models/base_input.py               |  58 ++++--
 hed/validator/spreadsheet_validator.py |   1 +
 tests/models/test_base_file_input.py   | 103 ---------
 tests/models/test_base_input.py        | 276 +++++++++++++++++++++++++
 tests/models/test_df_util.py           |  45 +++-
 5 files changed, 357 insertions(+), 126 deletions(-)
 delete mode 100644 tests/models/test_base_file_input.py
 create mode 100644 tests/models/test_base_input.py

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 869bc4ea6..f50ea5e4c 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -7,6 +7,7 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors.error_reporter import ErrorHandler
+import pandas as pd
 
 
 class BaseInput:
@@ -66,10 +67,7 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         elif not file:
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
-            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
-                                              dtype=str, keep_default_na=True, na_values=None)
-            # Convert nan values to a known value
-            self._dataframe = self._dataframe.fillna("n/a")
+            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str)
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -364,7 +362,7 @@ def assemble(self, mapper=None):
         """
         if mapper is None:
             mapper = self._mapper
-        import pandas as pd
+
         transformers, need_categorical = mapper.get_transformers()
         if not transformers:
             return None
@@ -374,35 +372,53 @@ def assemble(self, mapper=None):
 
         all_columns = all_columns.transform(transformers)
 
-        possible_column_references = [f"{column_name}" for column_name in self.columns if
-                                      column_name.lower() != "hed"]
+        return self._insert_columns(all_columns, list(transformers.keys()))
+
+    @staticmethod
+    def _find_column_refs(df):
         found_column_references = []
-        for column_name in all_columns:
-            df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
-            u_vals = pd.Series([j for i in df for j in i], dtype=str)
+        for column_name in df:
+            df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
+            u_vals = pd.Series([j for i in df_temp for j in i], dtype=str)
             u_vals = u_vals.unique()
             for val in u_vals:
                 if val not in found_column_references:
                     found_column_references.append(val)
 
+        return found_column_references
+
+    @staticmethod
+    def _insert_columns(df, known_columns=None):
+        if known_columns is None:
+            known_columns = list(df.columns)
+        possible_column_references = [f"{column_name}" for column_name in df.columns if
+                                      column_name.lower() != "hed"]
+        found_column_references = BaseInput._find_column_refs(df)
+
+        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
+        if invalid_replacements:
+            # todo: This check may be moved to validation
+            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
-        column_names = list(transformers.keys())
+        # todo: break this into a sub function(probably)
+        column_names = known_columns
         for column_name in valid_replacements:
             column_names.remove(column_name)
-        saved_columns = all_columns[valid_replacements]
+        saved_columns = df[valid_replacements]
         for column_name in column_names:
             for replacing_name in valid_replacements:
                 column_name_brackets = f"[{replacing_name}]"
-                all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
-                                                     in zip(all_columns[column_name], saved_columns[replacing_name]))
-        all_columns = all_columns[column_names]
+                df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
+                                            in zip(df[column_name], saved_columns[replacing_name]))
+        df = df[column_names]
 
-        return all_columns
+        return df
 
     @staticmethod
     def combine_dataframe(dataframe):
-        """ Combines all columns in the given dataframe into a single hed string series.
+        """ Combines all columns in the given dataframe into a single HED string series,
+            skipping empty columns and columns with empty strings.
 
         Parameters:
             dataframe(Dataframe): The dataframe to combine
@@ -410,8 +426,8 @@ def combine_dataframe(dataframe):
         Returns:
             Series: the assembled series
         """
-        dataframe = dataframe.agg(', '.join, axis=1)
+        dataframe = dataframe.agg(
+            lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1
+        )
 
-        # Potentially better ways to handle removing n/a by never inserting them to begin with.
-        dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True)
-        return dataframe
+        return dataframe
\ No newline at end of file
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index 136b5aa73..ba1f341ac 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -41,6 +41,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
+            # todo ian: Add more checks here for column inserters
             data = data.dataframe_a
 
         # Check the rows of the input data
diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py
deleted file mode 100644
index 8314072bd..000000000
--- a/tests/models/test_base_file_input.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import unittest
-import os
-import shutil
-from hed import Sidecar
-from hed import BaseInput, TabularInput
-from hed.models.column_mapper import ColumnMapper
-from hed.models import DefinitionDict
-from hed import schema
-
-# TODO: Add tests for base_file_input and include correct handling of 'n/a'
-
-
-class Test(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # todo: clean up these unit tests/add more
-        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
-        cls.base_data_dir = base_data_dir
-        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
-        # cls.json_def_filename = json_def_filename
-        json_def_sidecar = Sidecar(json_def_filename)
-        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
-        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
-
-        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
-        cls.base_output_folder = base_output
-        os.makedirs(base_output, exist_ok=True)
-
-        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                       '../data/bids_tests/eeg_ds003645s_hed'))
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../data/schema_tests/HED8.0.0.xml'))
-        cls.bids_root_path = bids_root_path
-        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        events_path = os.path.realpath(os.path.join(bids_root_path,
-                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
-
-        cls.hed_schema = schema.load_schema(schema_path)
-        sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
-        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
-                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
-        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.base_output_folder)
-
-    def test_gathered_defs(self):
-        # todo: add unit tests for definitions in tsv file
-        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
-        expected_defs = {
-            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
-            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
-            'jsonfiledef3': '(Item/JsonDef3/#)',
-            'takesvaluedef': '(Age/#)',
-            'valueclassdef': '(Acceleration/#)'
-        }
-        self.assertEqual(defs, expected_defs)
-
-    # def test_missing_column_name_issue(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-    #
-    # def test_expand_column_issues(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
new file mode 100644
index 000000000..392599f78
--- /dev/null
+++ b/tests/models/test_base_input.py
@@ -0,0 +1,276 @@
+import io
+import unittest
+import os
+import shutil
+from hed import Sidecar
+from hed import BaseInput, TabularInput
+from hed.models.column_mapper import ColumnMapper
+from hed.models import DefinitionDict
+from hed import schema
+import pandas as pd
+import numpy as np
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # todo: clean up these unit tests/add more
+        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
+        cls.base_data_dir = base_data_dir
+        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
+        # cls.json_def_filename = json_def_filename
+        json_def_sidecar = Sidecar(json_def_filename)
+        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
+        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
+
+        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
+        cls.base_output_folder = base_output
+        os.makedirs(base_output, exist_ok=True)
+
+        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../data/bids_tests/eeg_ds003645s_hed'))
+        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                    '../data/schema_tests/HED8.0.0.xml'))
+        cls.bids_root_path = bids_root_path
+        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+        events_path = os.path.realpath(os.path.join(bids_root_path,
+                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+
+        cls.hed_schema = schema.load_schema(schema_path)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
+        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
+        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
+                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
+        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.base_output_folder)
+
+    def test_gathered_defs(self):
+        # todo: add unit tests for definitions in tsv file
+        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
+        expected_defs = {
+            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
+            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
+            'jsonfiledef3': '(Item/JsonDef3/#)',
+            'takesvaluedef': '(Age/#)',
+            'valueclassdef': '(Acceleration/#)'
+        }
+        self.assertEqual(defs, expected_defs)
+
+    # def test_missing_column_name_issue(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+    #
+    # def test_expand_column_issues(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+
+
+class TestInsertColumns(unittest.TestCase):
+
+    def test_insert_columns_simple(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action"],
+            "column2": ["Item"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_insert_columns_multiple_rows(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action", "Event, Action"],
+            "column2": ["Item", "Subject"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Action", "Event, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    # def test_insert_columns_no_circular_reference(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["[column2], Event, Action"],
+    #         "column2": ["[column1], Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+    def test_insert_columns_multiple_columns(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, [column3], Action"],
+            "column2": ["Item"],
+            "column3": ["Subject"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Subject, Action"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_insert_columns_invalid_column_name(self):
+        df = pd.DataFrame({
+            "column1": ["[invalid_column], Event, Action"],
+            "column2": ["Item"]
+        })
+        with self.assertRaises(ValueError):
+            result = BaseInput._insert_columns(df)
+
+    def test_insert_columns_four_columns(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, [column3], Action"],
+            "column2": ["Item"],
+            "column3": ["Subject"],
+            "column4": ["Data"]
+        })
+        expected_df = pd.DataFrame({
+            "column1": ["Item, Event, Subject, Action"],
+            "column4": ["Data"]
+        })
+        result = BaseInput._insert_columns(df)
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    # def test_insert_columns_invalid_syntax(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["column2], Event, Action"],
+    #         "column2": ["Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+    # def test_insert_columns_no_self_reference(self):
+    #     df = pd.DataFrame({
+    #         "column1": ["[column1], Event, Action"],
+    #         "column2": ["Item"]
+    #     })
+    #     with self.assertRaises(ValueError):
+    #         result = BaseInput._insert_columns(df)
+
+
+class TestCombineDataframe(unittest.TestCase):
+    def test_combine_dataframe_with_strings(self):
+        data = {
+            'A': ['apple', 'banana', 'cherry'],
+            'B': ['dog', 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', 'piano']
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_nan_values(self):
+        data = {
+            'A': ['apple', np.nan, 'cherry'],
+            'B': [np.nan, 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', np.nan]
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_empty_values(self):
+        data = {
+            'A': ['apple', '', 'cherry'],
+            'B': ['', 'elephant', 'fox'],
+            'C': ['guitar', 'harmonica', '']
+        }
+        df = pd.DataFrame(data)
+        result = BaseInput.combine_dataframe(df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
+        self.assertTrue(result.equals(expected))
+
+    def test_combine_dataframe_with_mixed_values(self):
+        data = {
+            'A': ['apple', np.nan, 'cherry', 'n/a', ''],
+            'B': [np.nan, 'elephant', 'fox', 'n/a', ''],
+            'C': ['guitar', 'harmonica', np.nan, 'n/a', '']
+        }
+        df = pd.DataFrame(data)
+        csv_buffer = io.StringIO()
+        df.to_csv(csv_buffer, header=False, index=False)
+        csv_buffer.seek(0)
+
+        # Use the same loading function we normally use to verify n/a translates right.
+        loaded_df = pd.read_csv(csv_buffer, header=None)
+        result = BaseInput.combine_dataframe(loaded_df)
+        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
+        self.assertTrue(result.equals(expected))
+
+
+class TestColumnRefs(unittest.TestCase):
+    def test_simple_column_refs(self):
+        data1 = {
+            'A': ['[col1], [col2]', 'tag1, tag2'],
+            'B': ['tag3, tag4', '[col3]'],
+        }
+        df1 = pd.DataFrame(data1)
+        result1 = BaseInput._find_column_refs(df1)
+        expected1 = ['col1', 'col2', 'col3']
+        self.assertEqual(result1, expected1)
+
+    def test_mixed_cases_and_patterns(self):
+        data2 = {
+            'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
+        }
+        df2 = pd.DataFrame(data2)
+        result2 = BaseInput._find_column_refs(df2)
+        expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
+        self.assertEqual(result2, expected2)
+
+    def test_no_column_references(self):
+        data3 = {
+            'A': ['tag1, tag2', 'tag3, tag4'],
+            'B': ['tag5, tag6', 'tag7, tag8'],
+        }
+        df3 = pd.DataFrame(data3)
+        result3 = BaseInput._find_column_refs(df3)
+        expected3 = []
+        self.assertEqual(result3, expected3)
+
+    def test_incomplete_square_brackets(self):
+        data4 = {
+            'A': ['[col1, [col2]', 'tag1, [Col3'],
+            'B': ['tag3, [COL4', '[col5, col6]'],
+        }
+        df4 = pd.DataFrame(data4)
+        result4 = BaseInput._find_column_refs(df4)
+        expected4 = ['col2']
+        self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index bc9c907b7..e10e2a4a3 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -3,7 +3,7 @@
 
 
 from hed import load_schema_version
-from hed.models.df_util import shrink_defs, expand_defs
+from hed.models.df_util import shrink_defs, expand_defs, convert_to_form
 from hed import DefinitionDict
 
 
@@ -111,4 +111,45 @@ def test_expand_defs_series_placeholder(self):
         series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
         expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
         result = expand_defs(series, self.schema, self.def_dict, None)
-        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file
+        pd.testing.assert_series_equal(result, expected_series)
+
+
+class TestConvertToForm(unittest.TestCase):
+    def setUp(self):
+        self.schema = load_schema_version()
+
+    def test_convert_to_form_short_tags(self):
+        df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+        expected_df = pd.DataFrame({"column1": ["Azure,See"]})
+        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_long_tags(self):
+        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
+        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_series_short_tags(self):
+        series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+        expected_series = pd.Series(["Azure,See"])
+        result = convert_to_form(series, self.schema, "short_tag")
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_convert_to_form_series_long_tags(self):
+        series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
+        expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+        result = convert_to_form(series, self.schema, "long_tag")
+        pd.testing.assert_series_equal(result, expected_series)
+
+    def test_convert_to_form_multiple_tags_short(self):
+        df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]})
+        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
+
+    def test_convert_to_form_multiple_tags_long(self):
+        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+        pd.testing.assert_frame_equal(result, expected_df)
\ No newline at end of file

From bd4b71ab20cb16dcec925088696b21ed4d60ddb3 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Mon, 20 Mar 2023 17:48:20 -0500
Subject: [PATCH 09/19] Update na/empty handling

---
 hed/models/base_input.py        | 17 ++++++++++-------
 hed/models/hed_string.py        |  2 +-
 tests/models/test_base_input.py |  6 ++++++
 tests/models/test_df_util.py    |  2 +-
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index f50ea5e4c..af6249f56 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -67,7 +67,10 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         elif not file:
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
-            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str)
+            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
+                                              dtype=str, keep_default_na=True, na_values=None)
+            # Convert nan values to a known value
+            self._dataframe = self._dataframe.fillna("n/a")
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -365,7 +368,7 @@ def assemble(self, mapper=None):
 
         transformers, need_categorical = mapper.get_transformers()
         if not transformers:
-            return None
+            return self._dataframe
         all_columns = self._dataframe
         if need_categorical:
             all_columns[need_categorical] = all_columns[need_categorical].astype('category')
@@ -379,7 +382,7 @@ def _find_column_refs(df):
         found_column_references = []
         for column_name in df:
             df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
-            u_vals = pd.Series([j for i in df_temp for j in i], dtype=str)
+            u_vals = pd.Series([j for i in df_temp if isinstance(i, list) for j in i], dtype=str)
             u_vals = u_vals.unique()
             for val in u_vals:
                 if val not in found_column_references:
@@ -392,7 +395,7 @@ def _insert_columns(df, known_columns=None):
         if known_columns is None:
             known_columns = list(df.columns)
         possible_column_references = [f"{column_name}" for column_name in df.columns if
-                                      column_name.lower() != "hed"]
+                                      isinstance(column_name, str) and column_name.lower() != "hed"]
         found_column_references = BaseInput._find_column_refs(df)
 
         invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
@@ -426,8 +429,8 @@ def combine_dataframe(dataframe):
         Returns:
             Series: the assembled series
         """
-        dataframe = dataframe.agg(
-            lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1
+        dataframe = dataframe.apply(
+            lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))),
+            axis=1
         )
-
         return dataframe
\ No newline at end of file
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index fe864b28e..75f2de5b9 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -112,7 +112,7 @@ def expand_defs(self):
 
         replacements = []
         for tag in def_tags:
-            if not tag._expanded:
+            if tag.expandable and not tag.expanded:
                 replacements.append((tag, tag._expandable))
 
         for tag, group in replacements:
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index 392599f78..8404be04e 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -202,6 +202,8 @@ def test_combine_dataframe_with_nan_values(self):
             'C': ['guitar', 'harmonica', np.nan]
         }
         df = pd.DataFrame(data)
+        # this is called on load normally
+        df = df.fillna("n/a")
         result = BaseInput.combine_dataframe(df)
         expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
         self.assertTrue(result.equals(expected))
@@ -213,6 +215,7 @@ def test_combine_dataframe_with_empty_values(self):
             'C': ['guitar', 'harmonica', '']
         }
         df = pd.DataFrame(data)
+
         result = BaseInput.combine_dataframe(df)
         expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
         self.assertTrue(result.equals(expected))
@@ -224,12 +227,15 @@ def test_combine_dataframe_with_mixed_values(self):
             'C': ['guitar', 'harmonica', np.nan, 'n/a', '']
         }
         df = pd.DataFrame(data)
+        # this is called on load normally
+        df = df.fillna("n/a")
         csv_buffer = io.StringIO()
         df.to_csv(csv_buffer, header=False, index=False)
         csv_buffer.seek(0)
 
         # Use the same loading function we normally use to verify n/a translates right.
         loaded_df = pd.read_csv(csv_buffer, header=None)
+        loaded_df = loaded_df.fillna("n/a")
         result = BaseInput.combine_dataframe(loaded_df)
         expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
         self.assertTrue(result.equals(expected))
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index e10e2a4a3..2f1823e9d 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -144,7 +144,7 @@ def test_convert_to_form_series_long_tags(self):
 
     def test_convert_to_form_multiple_tags_short(self):
         df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]})
+        expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]})
         result = convert_to_form(df, self.schema, "short_tag", ['column1'])
         pd.testing.assert_frame_equal(result, expected_df)
 

From 9b6705f11745514e8bbb0df4632f383217ceab4c Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Tue, 21 Mar 2023 06:47:45 -0500
Subject: [PATCH 10/19] Making sure up to date before merging

---
 .../operations/factor_hed_tags_op.py          |  2 +-
 tests/models/test_df_util.py                  | 47 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py
index aa02224b9..930f1353f 100644
--- a/hed/tools/remodeling/operations/factor_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py
@@ -110,7 +110,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
                                  f"Query [{query_name}]: is already a column name of the data frame")
         df_list = [input_data.dataframe]
         hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, 
-                                                 join_columns=True, shrink_defs=False, expand_defs=True)
+                                       join_columns=True, shrink_defs=False, expand_defs=True)
         df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=self.query_names)
         for parse_ind, parser in enumerate(self.expression_parsers):
             for index, next_item in enumerate(hed_strings):
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index bc9c907b7..fe1d0f591 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -111,4 +111,49 @@ def test_expand_defs_series_placeholder(self):
         series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
         expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
         result = expand_defs(series, self.schema, self.def_dict, None)
-        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file
+# <<<<<<< HEAD
+#         pd.testing.assert_series_equal(result, expected_series)
+# 
+# 
+# class TestConvertToForm(unittest.TestCase):
+#     def setUp(self):
+#         self.schema = load_schema_version()
+# 
+#     def test_convert_to_form_short_tags(self):
+#         df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+#         expected_df = pd.DataFrame({"column1": ["Azure,See"]})
+#         result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+#         pd.testing.assert_frame_equal(result, expected_df)
+# 
+#     def test_convert_to_form_long_tags(self):
+#         df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
+#         expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
+#         result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+#         pd.testing.assert_frame_equal(result, expected_df)
+# 
+#     def test_convert_to_form_series_short_tags(self):
+#         series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+#         expected_series = pd.Series(["Azure,See"])
+#         result = convert_to_form(series, self.schema, "short_tag")
+#         pd.testing.assert_series_equal(result, expected_series)
+# 
+#     def test_convert_to_form_series_long_tags(self):
+#         series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
+#         expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
+#         result = convert_to_form(series, self.schema, "long_tag")
+#         pd.testing.assert_series_equal(result, expected_series)
+# 
+#     def test_convert_to_form_multiple_tags_short(self):
+#         df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+#         expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]})
+#         result = convert_to_form(df, self.schema, "short_tag", ['column1'])
+#         pd.testing.assert_frame_equal(result, expected_df)
+# 
+#     def test_convert_to_form_multiple_tags_long(self):
+#         df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+#         expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
+#         result = convert_to_form(df, self.schema, "long_tag", ['column1'])
+#         pd.testing.assert_frame_equal(result, expected_df)
+# =======
+        pd.testing.assert_series_equal(result, expected_series)
+# >>>>>>> 5bab6c620505fd4e97629d846a7abfbe68dc150a

From 2c66b650713b098c02d426e2915d9fafe7c2224f Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Tue, 21 Mar 2023 13:05:38 -0500
Subject: [PATCH 11/19] Updated the unit tests.  find_def_tags problematic

---
 hed/tools/analysis/hed_context_manager.py         |  4 ++--
 hed/tools/bids/bids_dataset.py                    |  4 ++--
 hed/tools/bids/bids_file_group.py                 | 15 +++++++--------
 hed/tools/remodeling/dispatcher.py                |  4 ++++
 .../remodel/backups/back1/backup_lock.json        |  6 ++++++
 .../back1/backup_root/sub1/sub1_events.tsv        |  2 ++
 .../back1/backup_root/sub2/sub2_events.tsv        |  2 ++
 .../back1/backup_root/sub2/sub2_next_events.tsv   |  2 ++
 .../backups/back1/backup_root/top_level.tsv       |  2 ++
 .../test_root_back1/sub1/sub1_events.tsv          |  2 ++
 .../test_root_back1/sub2/sub2_events.tsv          |  2 ++
 .../test_root_back1/sub2/sub2_next_events.tsv     |  2 ++
 .../remodel_tests/test_root_back1/top_level.tsv   |  2 ++
 tests/tools/bids/test_bids_dataset.py             | 15 +++++++++------
 tests/tools/bids/test_bids_file_group.py          | 12 ++++++------
 15 files changed, 52 insertions(+), 24 deletions(-)
 create mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
 create mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
 create mode 100644 tests/data/remodel_tests/test_root_back1/top_level.tsv

diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py
index 5c565a9a4..72298de1f 100644
--- a/hed/tools/analysis/hed_context_manager.py
+++ b/hed/tools/analysis/hed_context_manager.py
@@ -78,13 +78,13 @@ def _create_onset_list(self):
         onset_dict = {}
         for event_index, hed in enumerate(self.hed_strings):
             to_remove = []  # tag_tuples = hed.find_tags(['Onset'], recursive=False, include_groups=1)
-            onset_tuples = hed.find_tags(["onset"], recursive=True, include_groups=2)
+            onset_tuples = hed.find_top_level_tags(["onset"], include_groups=2)
             self.onset_count += len(onset_tuples)
             for tup in onset_tuples:
                 group = tup[1]
                 group.remove([tup[0]])
                 self._update_onset_list(group, onset_dict, event_index, is_offset=False)
-            offset_tuples = hed.find_tags(["offset"], recursive=True, include_groups=2)
+            offset_tuples = hed.find_top_level_tags(["offset"], include_groups=2)
             self.offset_count += len(offset_tuples)
             for tup in offset_tuples:
                 group = tup[1]
diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py
index 5b1b56e10..0438cb5fe 100644
--- a/hed/tools/bids/bids_dataset.py
+++ b/hed/tools/bids/bids_dataset.py
@@ -86,9 +86,9 @@ def validate(self, types=None, check_for_warnings=True):
         issues = []
         for tab_type in types:
             files = self.tabular_files[tab_type]
-            issues += files.validate_sidecars(hed_ops=[validator],
+            issues += files.validate_sidecars(self.schema,
                                               check_for_warnings=check_for_warnings, error_handler=error_handler)
-            issues += files.validate_datafiles(hed_ops=[validator],
+            issues += files.validate_datafiles(self.schema,
                                                check_for_warnings=check_for_warnings,
                                                error_handler=error_handler)
         return issues
diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py
index d354ade8a..418cfd97a 100644
--- a/hed/tools/bids/bids_file_group.py
+++ b/hed/tools/bids/bids_file_group.py
@@ -111,11 +111,11 @@ def summarize(self, value_cols=None, skip_cols=None):
         info.update(list(self.datafile_dict.keys()))
         return info
 
-    def validate_sidecars(self, hed_ops, check_for_warnings=True, error_handler=None):
+    def validate_sidecars(self, hed_schema, check_for_warnings=True, error_handler=None):
         """ Validate merged sidecars.
 
         Parameters:
-            hed_ops ([func or HedOps], func, HedOps):  Validation functions to apply.
+            hed_schema (HedSchema):  HED schema for validation.
             check_for_warnings (bool):  If True, include warnings in the check.
             error_handler (ErrorHandler): The common error handler for the dataset.
 
@@ -130,17 +130,15 @@ def validate_sidecars(self, hed_ops, check_for_warnings=True, error_handler=None
         for sidecar in self.sidecar_dict.values():
             error_handler.push_error_context(ErrorContext.FILE_NAME, sidecar.file_path)
             if sidecar.has_hed:
-                issues += sidecar.contents.validate_entries(hed_ops=hed_ops,
-                                                            name=sidecar.file_path,
-                                                            check_for_warnings=check_for_warnings)
+                issues += sidecar.contents.validate(hed_schema, name=sidecar.file_path)
                 error_handler.pop_error_context()
         return issues
 
-    def validate_datafiles(self, hed_ops, check_for_warnings=True, keep_contents=False, error_handler=None):
+    def validate_datafiles(self, hed_schema, check_for_warnings=True, keep_contents=False, error_handler=None):
         """ Validate the datafiles and return an error list.
 
         Parameters:
-            hed_ops ([func or HedOps], func, HedOps):  Validation functions to apply.
+            hed_schema (HedSchema):  Schema to apply to the validation.
             check_for_warnings (bool):  If True, include warnings in the check.
             keep_contents (bool):       If True, the underlying data files are read and their contents retained.
             error_handler (ErrorHandler): The common error handler to use for the dataset.
@@ -159,7 +157,8 @@ def validate_datafiles(self, hed_ops, check_for_warnings=True, keep_contents=Fal
             if not data_obj.has_hed:
                 continue
             data = data_obj.contents
-            issues += data.validate_file(hed_ops=hed_ops, check_for_warnings=check_for_warnings)
+
+            issues += data.validate(hed_schema)
             if not keep_contents:
                 data_obj.clear_contents()
             error_handler.pop_error_context()
diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py
index 4cc4df9f9..5371bb2d1 100644
--- a/hed/tools/remodeling/dispatcher.py
+++ b/hed/tools/remodeling/dispatcher.py
@@ -222,6 +222,10 @@ def post_proc_data(df):
             DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'
 
         """
+        dtypes = df.dtypes.to_dict()
+        for col_name, typ in dtypes.items():
+            if typ == 'category':
+                df[col_name] = df[col_name].astype(str)
         return df.fillna('n/a')
 
     @staticmethod
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
new file mode 100644
index 000000000..d3e4b6991
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
@@ -0,0 +1,6 @@
+{
+    "top_level.tsv": "2022-09-16 13:20:21.423303",
+    "sub1/sub1_events.tsv": "2022-09-16 13:20:21.423303",
+    "sub2/sub2_events.tsv": "2022-09-16 13:20:21.423303",
+    "sub2/sub2_next_events.tsv": "2022-09-16 13:20:21.423303"
+}
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
new file mode 100644
index 000000000..d2191cec6
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk2
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
new file mode 100644
index 000000000..ef5c73314
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk3
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
new file mode 100644
index 000000000..ae9d3d35d
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk4
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
new file mode 100644
index 000000000..c71cc2553
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk1
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv b/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
new file mode 100644
index 000000000..d2191cec6
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk2
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv b/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
new file mode 100644
index 000000000..ef5c73314
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk3
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv b/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
new file mode 100644
index 000000000..ae9d3d35d
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk4
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/top_level.tsv b/tests/data/remodel_tests/test_root_back1/top_level.tsv
new file mode 100644
index 000000000..c71cc2553
--- /dev/null
+++ b/tests/data/remodel_tests/test_root_back1/top_level.tsv
@@ -0,0 +1,2 @@
+onset	duration	stuff
+3.2	0.5	junk1
\ No newline at end of file
diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py
index 6289be314..df02448bf 100644
--- a/tests/tools/bids/test_bids_dataset.py
+++ b/tests/tools/bids/test_bids_dataset.py
@@ -68,18 +68,21 @@ def test_validator(self):
         self.assertTrue(issues, "BidsDataset validate should return issues when the default check_for_warnings is used")
         issues = bids.validate(check_for_warnings=True)
         self.assertTrue(issues, "BidsDataset validate should return issues when check_for_warnings is True")
-        issues = bids.validate(check_for_warnings=False)
-        self.assertFalse(issues, "BidsDataset validate should return no issues when check_for_warnings is False")
+        # ToDO 
+        # issues = bids.validate(check_for_warnings=False)
+        # self.assertFalse(issues, "BidsDataset validate should return no issues when check_for_warnings is False")
 
     def test_validator_libraries(self):
         bids = BidsDataset(self.library_path)
-        issues = bids.validate(check_for_warnings=False)
-        self.assertFalse(issues, "BidsDataset with libraries should validate")
+        # ToDO check_for_warnings
+        # issues = bids.validate(check_for_warnings=False)
+        # self.assertFalse(issues, "BidsDataset with libraries should validate")
 
     def test_validator_types(self):
         bids = BidsDataset(self.root_path, tabular_types=None)
-        issues = bids.validate(check_for_warnings=False)
-        self.assertFalse(issues, "BidsDataset with participants and events validates")
+        # ToDO: check_for_warnings
+        # issues = bids.validate(check_for_warnings=False)
+        # self.assertFalse(issues, "BidsDataset with participants and events validates")
 
     def test_with_schema_group(self):
         base_version = '8.0.0'
diff --git a/tests/tools/bids/test_bids_file_group.py b/tests/tools/bids/test_bids_file_group.py
index 04482de47..22d395085 100644
--- a/tests/tools/bids/test_bids_file_group.py
+++ b/tests/tools/bids/test_bids_file_group.py
@@ -32,12 +32,12 @@ def test_constructor(self):
 
     def test_validator(self):
         events = BidsFileGroup(self.root_path)
-        hed_schema = \
-            load_schema('https://raw.githubusercontent.com/hed-standard/hed-schemas/main/standard_schema/hedxml/HED8.0.0.xml')
-        validator = HedValidator(hed_schema)
-        validation_issues = events.validate_datafiles(hed_ops=[validator], check_for_warnings=False)
-        self.assertFalse(validation_issues, "BidsFileGroup should have no validation errors")
-        validation_issues = events.validate_datafiles(hed_ops=[validator], check_for_warnings=True)
+        hed = 'https://raw.githubusercontent.com/hed-standard/hed-schemas/main/standard_schema/hedxml/HED8.0.0.xml'
+        hed_schema = load_schema(hed)
+        # TODO test after filtering.
+        # validation_issues = events.validate_datafiles(hed_schema, check_for_warnings=False)
+        # self.assertFalse(validation_issues, "BidsFileGroup should have no validation errors")
+        validation_issues = events.validate_datafiles(hed_schema, check_for_warnings=True)
         self.assertTrue(validation_issues, "BidsFileGroup should have validation warnings")
         self.assertEqual(len(validation_issues), 6,
                          "BidsFileGroup should have 2 validation warnings for missing columns")

From 62806e481619b71c709c43c653a5ee2f95db6266 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Tue, 21 Mar 2023 13:11:29 -0500
Subject: [PATCH 12/19] Updated the unit tests

---
 .../derivatives/remodel/backups/back1/backup_lock.json      | 6 ------
 .../remodel/backups/back1/backup_root/sub1/sub1_events.tsv  | 2 --
 .../remodel/backups/back1/backup_root/sub2/sub2_events.tsv  | 2 --
 .../backups/back1/backup_root/sub2/sub2_next_events.tsv     | 2 --
 .../remodel/backups/back1/backup_root/top_level.tsv         | 2 --
 .../data/remodel_tests/test_root_back1/sub1/sub1_events.tsv | 2 --
 .../data/remodel_tests/test_root_back1/sub2/sub2_events.tsv | 2 --
 .../remodel_tests/test_root_back1/sub2/sub2_next_events.tsv | 2 --
 tests/data/remodel_tests/test_root_back1/top_level.tsv      | 2 --
 9 files changed, 22 deletions(-)
 delete mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
 delete mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
 delete mode 100644 tests/data/remodel_tests/test_root_back1/top_level.tsv

diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
deleted file mode 100644
index d3e4b6991..000000000
--- a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_lock.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "top_level.tsv": "2022-09-16 13:20:21.423303",
-    "sub1/sub1_events.tsv": "2022-09-16 13:20:21.423303",
-    "sub2/sub2_events.tsv": "2022-09-16 13:20:21.423303",
-    "sub2/sub2_next_events.tsv": "2022-09-16 13:20:21.423303"
-}
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
deleted file mode 100644
index d2191cec6..000000000
--- a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub1/sub1_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk2
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
deleted file mode 100644
index ef5c73314..000000000
--- a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk3
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
deleted file mode 100644
index ae9d3d35d..000000000
--- a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/sub2/sub2_next_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk4
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv b/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
deleted file mode 100644
index c71cc2553..000000000
--- a/tests/data/remodel_tests/test_root_back1/derivatives/remodel/backups/back1/backup_root/top_level.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk1
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv b/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
deleted file mode 100644
index d2191cec6..000000000
--- a/tests/data/remodel_tests/test_root_back1/sub1/sub1_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk2
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv b/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
deleted file mode 100644
index ef5c73314..000000000
--- a/tests/data/remodel_tests/test_root_back1/sub2/sub2_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk3
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv b/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
deleted file mode 100644
index ae9d3d35d..000000000
--- a/tests/data/remodel_tests/test_root_back1/sub2/sub2_next_events.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk4
\ No newline at end of file
diff --git a/tests/data/remodel_tests/test_root_back1/top_level.tsv b/tests/data/remodel_tests/test_root_back1/top_level.tsv
deleted file mode 100644
index c71cc2553..000000000
--- a/tests/data/remodel_tests/test_root_back1/top_level.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-onset	duration	stuff
-3.2	0.5	junk1
\ No newline at end of file

From 3bfbd3bba2e8fdf524bde65b051a5afaa35dbb7e Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Tue, 21 Mar 2023 16:53:05 -0500
Subject: [PATCH 13/19] Fix hed_string.expand_defs issue

---
 hed/models/hed_string.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index 75f2de5b9..db16833f1 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -116,7 +116,8 @@ def expand_defs(self):
                 replacements.append((tag, tag._expandable))
 
         for tag, group in replacements:
-            self.replace(tag, group)
+            tag_parent = tag._parent
+            tag_parent.replace(tag, group)
             tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY
 
         return self

From 3af84e9927224c132af07431de8e499ddd5a9f27 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Thu, 23 Mar 2023 16:26:35 -0500
Subject: [PATCH 14/19] Corrected some of the refactored unit tests

---
 hed/tools/analysis/hed_type_counts.py         |  2 +-
 hed/tools/bids/bids_file_group.py             | 34 ++++++++-----------
 .../operations/factor_hed_type_op.py          |  2 +-
 .../operations/summarize_hed_type_op.py       |  5 ++-
 tests/tools/bids/test_bids_file_group.py      |  5 ++-
 .../operations/test_summarize_hed_tags_op.py  |  8 ++---
 .../operations/test_summarize_hed_type_op.py  | 20 +++++++++--
 7 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/hed/tools/analysis/hed_type_counts.py b/hed/tools/analysis/hed_type_counts.py
index 056bd63d7..e68f2064e 100644
--- a/hed/tools/analysis/hed_type_counts.py
+++ b/hed/tools/analysis/hed_type_counts.py
@@ -147,4 +147,4 @@ def get_summary(self):
         for type_value, count in self.type_dict.items():
             details[type_value] = count.get_summary()
         return {'name': str(self.name), 'type_tag': self.type_tag, 'files': list(self.files.keys()),
-                'total_events': self.total_events, 'details': details}
+                'total_events': self.total_events, 'details': details}
\ No newline at end of file
diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py
index 418cfd97a..dfb3439af 100644
--- a/hed/tools/bids/bids_file_group.py
+++ b/hed/tools/bids/bids_file_group.py
@@ -2,6 +2,8 @@
 
 import os
 from hed.errors.error_reporter import ErrorContext, ErrorHandler
+from hed.validator.sidecar_validator import SidecarValidator
+from hed.validator.spreadsheet_validator import SpreadsheetValidator
 from hed.tools.analysis.tabular_summary import TabularSummary
 from hed.tools.bids.bids_tabular_file import BidsTabularFile
 from hed.tools.bids.bids_sidecar_file import BidsSidecarFile
@@ -111,57 +113,51 @@ def summarize(self, value_cols=None, skip_cols=None):
         info.update(list(self.datafile_dict.keys()))
         return info
 
-    def validate_sidecars(self, hed_schema, check_for_warnings=True, error_handler=None):
+    def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings=True):
         """ Validate merged sidecars.
 
         Parameters:
             hed_schema (HedSchema):  HED schema for validation.
+            extra_def_dicts (DefinitionDict): Extra definitions
             check_for_warnings (bool):  If True, include warnings in the check.
-            error_handler (ErrorHandler): The common error handler for the dataset.
 
         Returns:
             list:   A list of validation issues found. Each issue is a dictionary.
 
         """
 
-        if not error_handler:
-            error_handler = ErrorHandler()
+        error_handler = ErrorHandler(check_for_warnings)
         issues = []
+        validator = SidecarValidator(hed_schema)
+        
         for sidecar in self.sidecar_dict.values():
-            error_handler.push_error_context(ErrorContext.FILE_NAME, sidecar.file_path)
-            if sidecar.has_hed:
-                issues += sidecar.contents.validate(hed_schema, name=sidecar.file_path)
-                error_handler.pop_error_context()
+            name = os.path.basename(sidecar.file_path)
+            issues += validator.validate(extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler)
         return issues
 
-    def validate_datafiles(self, hed_schema, check_for_warnings=True, keep_contents=False, error_handler=None):
+    def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warnings=True, keep_contents=False):
         """ Validate the datafiles and return an error list.
 
         Parameters:
             hed_schema (HedSchema):  Schema to apply to the validation.
+            extra_def_dicts (DefinitionDict):  Extra definitions that come from outside.
             check_for_warnings (bool):  If True, include warnings in the check.
             keep_contents (bool):       If True, the underlying data files are read and their contents retained.
-            error_handler (ErrorHandler): The common error handler to use for the dataset.
 
         Returns:
             list:    A list of validation issues found. Each issue is a dictionary.
 
         """
 
-        if not error_handler:
-            error_handler = ErrorHandler()
+        error_handler = ErrorHandler(check_for_warnings)
         issues = []
         for data_obj in self.datafile_dict.values():
-            error_handler.push_error_context(ErrorContext.FILE_NAME, data_obj.file_path)
             data_obj.set_contents(overwrite=False)
-            if not data_obj.has_hed:
-                continue
-            data = data_obj.contents
-
-            issues += data.validate(hed_schema)
+            name = os.path.basename(data_obj.file_path)
+            issues += data_obj.contents.validate(data_obj.contents, extra_def_dicts=None, name=name, 
+                                                 error_handler=error_handler)
             if not keep_contents:
                 data_obj.clear_contents()
-            error_handler.pop_error_context()
         return issues
 
     def _make_datafile_dict(self):
diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py
index 668886c88..0a61974ed 100644
--- a/hed/tools/remodeling/operations/factor_hed_type_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_type_op.py
@@ -74,7 +74,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         df_list = [input_data.dataframe.copy()]
         hed_strings, definitions = get_assembled(input_data, sidecar, dispatcher.hed_schema, 
                                                  extra_def_dicts=None, join_columns=True,
-                                                 shrink_defs=False, expand_defs=True)
+                                                 shrink_defs=True, expand_defs=False)
 
         var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions)
         var_manager.add_type_variable(self.type_tag.lower())
diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py
index 0e2664698..85ea41d7d 100644
--- a/hed/tools/remodeling/operations/summarize_hed_type_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py
@@ -93,8 +93,7 @@ def update_context(self, new_context):
             sidecar = Sidecar(sidecar)
         input_data = TabularInput(new_context['df'], sidecar=sidecar, name=new_context['name'])
         hed_strings, definitions = get_assembled(input_data, sidecar, new_context['schema'], 
-                                                 extra_def_dicts=None, join_columns=True,
-                                                 shrink_defs=False, expand_defs=True)
+                                                 extra_def_dicts=None, join_columns=True, expand_defs=False)
         context_manager = HedContextManager(hed_strings, new_context['schema'])
         type_values = HedTypeValues(context_manager, definitions, new_context['name'], type_tag=self.type_tag)
 
@@ -176,4 +175,4 @@ def _level_details(level_counts, offset="", indent=""):
                 level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}")
             if details['description']:
                 level_list.append(f"{offset}{indent*3}Description: {details['description']}")
-        return level_list
+        return level_list
\ No newline at end of file
diff --git a/tests/tools/bids/test_bids_file_group.py b/tests/tools/bids/test_bids_file_group.py
index 22d395085..4d4302b72 100644
--- a/tests/tools/bids/test_bids_file_group.py
+++ b/tests/tools/bids/test_bids_file_group.py
@@ -34,9 +34,8 @@ def test_validator(self):
         events = BidsFileGroup(self.root_path)
         hed = 'https://raw.githubusercontent.com/hed-standard/hed-schemas/main/standard_schema/hedxml/HED8.0.0.xml'
         hed_schema = load_schema(hed)
-        # TODO test after filtering.
-        # validation_issues = events.validate_datafiles(hed_schema, check_for_warnings=False)
-        # self.assertFalse(validation_issues, "BidsFileGroup should have no validation errors")
+        validation_issues = events.validate_datafiles(hed_schema, check_for_warnings=False)
+        self.assertFalse(validation_issues, "BidsFileGroup should have no validation errors")
         validation_issues = events.validate_datafiles(hed_schema, check_for_warnings=True)
         self.assertTrue(validation_issues, "BidsFileGroup should have validation warnings")
         self.assertEqual(len(validation_issues), 6,
diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
index 5f5ee41bf..aa3bd4b9c 100644
--- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
+++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
@@ -104,7 +104,7 @@ def test_quick3(self):
         input_data = TabularInput(df, sidecar=my_sidecar)
         counts = HedTagCounts('myName', 2)
         summary_dict = {}
-        hed_strings = get_assembled(input_data, my_sidecar, my_schema, extra_def_dicts=None, join_columns=True,
+        hed_strings, definitions = get_assembled(input_data, my_sidecar, my_schema, extra_def_dicts=None, join_columns=True,
                                     shrink_defs=False, expand_defs=True)
         for hed in hed_strings:
             counts.update_event_counts(hed, 'myName')
@@ -126,10 +126,8 @@ def test_quick4(self):
         hed_strings, definitions = get_assembled(input_data, sidecar, my_schema, 
                                                  extra_def_dicts=None, join_columns=True,
                                                  shrink_defs=False, expand_defs=True)
-        for objs in input_data.iter_dataframe(hed_ops=[my_schema], return_string_only=False,
-                                              expand_defs=True, remove_definitions=True):
-            x = objs['HED']
-            counts.update_event_counts(objs['HED'], 'myName')
+        for hed in hed_strings:
+            counts.update_event_counts(hed, 'myName')
         summary_dict['myName'] = counts
 
     def test_get_summary_details(self):
diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py
index df72c65ee..c7b18ad90 100644
--- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py
+++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py
@@ -40,6 +40,10 @@ def setUpClass(cls):
         cls.summary_path = \
             os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                           '../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json'))
+        rel_path = '../../../data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv'
+        cls.events_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_path))
+        rel_side =  '../../../data/remodel_tests/task-FacePerception_events.json'
+        cls.sidecar_path_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_side))
 
     @classmethod
     def tearDownClass(cls):
@@ -75,9 +79,21 @@ def test_summary(self):
         self.assertEqual(len(summary2['Dataset']['Overall summary']['files']), 2)
         summary2a = context2.get_summary(individual_summaries="separate")
         self.assertIsInstance(summary2a["Individual files"]["run-02"], dict)
+        
+    def test_text_summary_with_levels(self):
+        with open(self.summary_path, 'r') as fp:
+            parms = json.load(fp)
+        dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0'])
+        df = dispatch.get_data_file(self.events_wh)
+        parsed_commands, errors = Dispatcher.parse_operations(parms)
+        sum_op = parsed_commands[2]
+        sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path_wh)
+        context1 = dispatch.context_dict['AOMIC_condition_variables']
+        text_summary1 = context1.get_text_summary()
+        self.assertIsInstance(text_summary1, dict)
 
     def test_text_summary(self):
-        sidecar = Sidecar(self.sidecar_path, 'aomic_sidecar', hed_schema=self.hed_schema)
+        sidecar = Sidecar(self.sidecar_path, name='aomic_sidecar')
 
         with open(self.summary_path, 'r') as fp:
             parms = json.load(fp)
@@ -104,4 +120,4 @@ def test_text_summary(self):
 
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
\ No newline at end of file

From bc5c94915f39e8aab3708aead895893ffa357eac Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Thu, 23 Mar 2023 16:49:35 -0500
Subject: [PATCH 15/19] Updated bids tests

---
 hed/tools/bids/bids_dataset.py                 | 10 +++-------
 hed/tools/bids/bids_file_group.py              |  5 +++--
 tests/tools/analysis/test_event_manager.py     |  1 -
 tests/tools/bids/test_bids_dataset.py          | 15 ++++++---------
 tests/tools/remodeling/cli/test_run_remodel.py |  4 +---
 5 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py
index 0438cb5fe..bbb06ae7b 100644
--- a/hed/tools/bids/bids_dataset.py
+++ b/hed/tools/bids/bids_dataset.py
@@ -79,18 +79,14 @@ def validate(self, types=None, check_for_warnings=True):
             list:  List of issues encountered during validation. Each issue is a dictionary.
 
         """
-        validator = HedValidator(hed_schema=self.schema)
-        error_handler = ErrorHandler()
+
         if not types:
             types = list(self.tabular_files.keys())
         issues = []
         for tab_type in types:
             files = self.tabular_files[tab_type]
-            issues += files.validate_sidecars(self.schema,
-                                              check_for_warnings=check_for_warnings, error_handler=error_handler)
-            issues += files.validate_datafiles(self.schema,
-                                               check_for_warnings=check_for_warnings,
-                                               error_handler=error_handler)
+            issues += files.validate_sidecars(self.schema, check_for_warnings=check_for_warnings)
+            issues += files.validate_datafiles(self.schema, check_for_warnings=check_for_warnings)
         return issues
 
     def get_summary(self):
diff --git a/hed/tools/bids/bids_file_group.py b/hed/tools/bids/bids_file_group.py
index dfb3439af..44f3f1a21 100644
--- a/hed/tools/bids/bids_file_group.py
+++ b/hed/tools/bids/bids_file_group.py
@@ -132,7 +132,8 @@ def validate_sidecars(self, hed_schema, extra_def_dicts=None, check_for_warnings
         
         for sidecar in self.sidecar_dict.values():
             name = os.path.basename(sidecar.file_path)
-            issues += validator.validate(extra_def_dicts=extra_def_dicts, name=name, error_handler=error_handler)
+            issues += validator.validate(sidecar.contents, extra_def_dicts=extra_def_dicts, name=name, 
+                                         error_handler=error_handler)
         return issues
 
     def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warnings=True, keep_contents=False):
@@ -154,7 +155,7 @@ def validate_datafiles(self, hed_schema, extra_def_dicts=None, check_for_warning
         for data_obj in self.datafile_dict.values():
             data_obj.set_contents(overwrite=False)
             name = os.path.basename(data_obj.file_path)
-            issues += data_obj.contents.validate(data_obj.contents, extra_def_dicts=None, name=name, 
+            issues += data_obj.contents.validate(hed_schema, extra_def_dicts=None, name=name, 
                                                  error_handler=error_handler)
             if not keep_contents:
                 data_obj.clear_contents()
diff --git a/tests/tools/analysis/test_event_manager.py b/tests/tools/analysis/test_event_manager.py
index 09eb17a50..8f84549d1 100644
--- a/tests/tools/analysis/test_event_manager.py
+++ b/tests/tools/analysis/test_event_manager.py
@@ -36,7 +36,6 @@ def test_constructor(self):
                 self.assertEqual(event.start_time, manager1.data.dataframe.loc[index, "onset"])
                 if not event.end_time:
                     self.assertEqual(event.end_index, len(manager1.data.dataframe))
-        print("to here")
 
     # def test_constructor(self):
     #     with self.assertRaises(ValueError) as cont:
diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py
index df02448bf..6289be314 100644
--- a/tests/tools/bids/test_bids_dataset.py
+++ b/tests/tools/bids/test_bids_dataset.py
@@ -68,21 +68,18 @@ def test_validator(self):
         self.assertTrue(issues, "BidsDataset validate should return issues when the default check_for_warnings is used")
         issues = bids.validate(check_for_warnings=True)
         self.assertTrue(issues, "BidsDataset validate should return issues when check_for_warnings is True")
-        # ToDO 
-        # issues = bids.validate(check_for_warnings=False)
-        # self.assertFalse(issues, "BidsDataset validate should return no issues when check_for_warnings is False")
+        issues = bids.validate(check_for_warnings=False)
+        self.assertFalse(issues, "BidsDataset validate should return no issues when check_for_warnings is False")
 
     def test_validator_libraries(self):
         bids = BidsDataset(self.library_path)
-        # ToDO check_for_warnings
-        # issues = bids.validate(check_for_warnings=False)
-        # self.assertFalse(issues, "BidsDataset with libraries should validate")
+        issues = bids.validate(check_for_warnings=False)
+        self.assertFalse(issues, "BidsDataset with libraries should validate")
 
     def test_validator_types(self):
         bids = BidsDataset(self.root_path, tabular_types=None)
-        # ToDO: check_for_warnings
-        # issues = bids.validate(check_for_warnings=False)
-        # self.assertFalse(issues, "BidsDataset with participants and events validates")
+        issues = bids.validate(check_for_warnings=False)
+        self.assertFalse(issues, "BidsDataset with participants and events validates")
 
     def test_with_schema_group(self):
         base_version = '8.0.0'
diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py
index d0611058e..099f80252 100644
--- a/tests/tools/remodeling/cli/test_run_remodel.py
+++ b/tests/tools/remodeling/cli/test_run_remodel.py
@@ -97,9 +97,7 @@ def test_main_bids_no_sidecar_with_hed(self):
         os.remove(self.sidecar_path)
         with patch('sys.stdout', new=io.StringIO()) as fp:
             main(arg_list)
-            a = fp.getvalue()
-            print("to here")
-            #self.assertFalse(fp.getvalue())
+            self.assertFalse(fp.getvalue())
 
     def test_main_direct_no_sidecar(self):
         arg_list = [self.data_root, self.model_path, '-x', 'derivatives', 'stimuli']

From 697791c680b45cbe2c69dc2315fc3945ac9d5c95 Mon Sep 17 00:00:00 2001
From: IanCa <30812436+IanCa@users.noreply.github.com>
Date: Thu, 23 Mar 2023 17:25:01 -0500
Subject: [PATCH 16/19] =?UTF-8?q?Add=20squre=20bracket=20in=20column=20val?=
 =?UTF-8?q?idation=20for=20spreadsheets.=20=20Update=20erro=E2=80=A6=20(#6?=
 =?UTF-8?q?32)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add squre bracket in column validation for spreadsheets.  Update error handling slightly(error list is now sorted by context always)

* Further fix shrinking/expanding(with some test cases).  Start updating errors to spec names.
---
 hed/errors/__init__.py                        |   2 +-
 hed/errors/error_messages.py                  |  60 +++++-
 hed/errors/error_reporter.py                  |  71 +++++--
 hed/errors/error_types.py                     |  27 ++-
 hed/models/base_input.py                      |  56 +++---
 hed/models/definition_dict.py                 |   2 +
 hed/models/definition_entry.py                |   5 +-
 hed/models/hed_group.py                       |   3 -
 hed/models/hed_string.py                      |   2 +
 hed/models/hed_tag.py                         |  13 ++
 hed/models/sidecar.py                         |   3 +-
 hed/schema/schema_compliance.py               |   2 +-
 hed/validator/def_validator.py                |  24 ++-
 hed/validator/sidecar_validator.py            |   5 +-
 hed/validator/spreadsheet_validator.py        |  99 +++++++++-
 hed/validator/tag_validator.py                |   6 +-
 tests/errors/test_error_reporter.py           |   8 +-
 tests/models/test_base_input.py               |  48 +----
 tests/schema/test_convert_tags.py             |   2 +-
 tests/validator/test_def_validator.py         | 177 ++++++++++++++++++
 tests/validator/test_onset_validator.py       |   4 +-
 tests/validator/test_spreadsheet_validator.py |  57 ++++++
 tests/validator/test_tag_validator.py         |  14 +-
 tests/validator/test_tag_validator_base.py    |   2 +-
 24 files changed, 564 insertions(+), 128 deletions(-)
 create mode 100644 tests/validator/test_spreadsheet_validator.py

diff --git a/hed/errors/__init__.py b/hed/errors/__init__.py
index 0583dd562..c2f58a07c 100644
--- a/hed/errors/__init__.py
+++ b/hed/errors/__init__.py
@@ -1,4 +1,4 @@
-from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string
+from .error_reporter import ErrorHandler, get_exception_issue_string, get_printable_issue_string, sort_issues
 from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings,  SidecarErrors, ValidationErrors
 from .error_types import ErrorContext, ErrorSeverity
 from .exceptions import HedExceptions, HedFileError
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index 9ae9557f3..ca379992f 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -6,7 +6,7 @@
 
 from hed.errors.error_reporter import hed_error, hed_tag_error
 from hed.errors.error_types import ValidationErrors, SchemaErrors, \
-    SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors
+    SidecarErrors, SchemaWarnings, ErrorSeverity, DefinitionErrors, OnsetErrors, ColumnErrors
 
 
 @hed_tag_error(ValidationErrors.HED_UNITS_INVALID)
@@ -31,14 +31,14 @@ def val_error_tag_extended(tag, problem_tag):
     return f"Hed tag is extended. '{problem_tag}' in {tag}"
 
 
-@hed_error(ValidationErrors.HED_CHARACTER_INVALID)
+@hed_error(ValidationErrors.CHARACTER_INVALID)
 def val_error_invalid_char(source_string, char_index):
     character = source_string[char_index]
     return f'Invalid character "{character}" at index {char_index}"'
 
 
 @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True,
-               actual_code=ValidationErrors.HED_CHARACTER_INVALID)
+               actual_code=ValidationErrors.CHARACTER_INVALID)
 def val_error_invalid_tag_character(tag, problem_tag):
     return f"Invalid character '{problem_tag}' in {tag}"
 
@@ -49,7 +49,7 @@ def val_error_tildes_not_supported(source_string, char_index):
     return f"Tildes not supported.  Replace (a ~ b ~ c) with (a, (b, c)).  '{character}' at index {char_index}'"
 
 
-@hed_error(ValidationErrors.HED_COMMA_MISSING)
+@hed_error(ValidationErrors.COMMA_MISSING)
 def val_error_comma_missing(tag):
     return f"Comma missing after - '{tag}'"
 
@@ -143,27 +143,44 @@ def val_error_sidecar_key_missing(invalid_key, category_keys):
     return f"Category key '{invalid_key}' does not exist in column.  Valid keys are: {category_keys}"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED)
-def val_error_def_unmatched(tag):
-    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
 def val_error_bad_def_expand(tag, actual_def, found_def):
     return f"A data-recording’s Def-expand tag does not match the given definition." + \
            f"Tag: '{tag}'.  Actual Def: {actual_def}.  Found Def: {found_def}"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED, actual_code=ValidationErrors.DEF_INVALID)
+def val_error_def_unmatched(tag):
+    return f"A data-recording’s Def tag cannot be matched to definition.  Tag: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.DEF_INVALID)
 def val_error_def_value_missing(tag):
     return f"A def tag requires a placeholder value, but was not given one.  Definition: '{tag}'"
 
 
-@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.HED_DEF_VALUE_INVALID)
+@hed_tag_error(ValidationErrors.HED_DEF_VALUE_EXTRA, actual_code=ValidationErrors.DEF_INVALID)
 def val_error_def_value_extra(tag):
     return f"A def tag does not take a placeholder value, but was given one.  Definition: '{tag}"
 
 
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_UNMATCHED, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_unmatched(tag):
+    return f"A data-recording’s Def-expand tag cannot be matched to definition.  Tag: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_value_missing(tag):
+    return f"A Def-expand tag requires a placeholder value, but was not given one.  Definition: '{tag}'"
+
+
+@hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA, actual_code=ValidationErrors.DEF_EXPAND_INVALID)
+def val_error_def_expand_value_extra(tag):
+    return f"A Def-expand tag does not take a placeholder value, but was given one.  Definition: '{tag}"
+
+
 @hed_tag_error(ValidationErrors.HED_TOP_LEVEL_TAG, actual_code=ValidationErrors.HED_TAG_GROUP_ERROR)
 def val_error_top_level_tag(tag):
     return f"A tag that must be in a top level group was found in another location.  {str(tag)}"
@@ -342,3 +359,26 @@ def onset_wrong_placeholder(tag, has_placeholder):
     if has_placeholder:
         return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one."
     return f"Onset/offset def tag {tag} should not have a placeholder, but has one."
+
+
+@hed_error(ColumnErrors.INVALID_COLUMN_REF)
+def invalid_column_ref(bad_refs):
+    return f"Bad column references found(columns do not exist): {bad_refs}"
+
+
+@hed_error(ColumnErrors.SELF_COLUMN_REF)
+def self_column_ref(self_ref):
+    return f"Column references itself: {self_ref}"
+
+
+@hed_error(ColumnErrors.NESTED_COLUMN_REF)
+def nested_column_ref(column_name, ref_column):
+    return f"Column {column_name} has a nested reference to {ref_column}.  " \
+           f"Column reference columns cannot contain other column references."
+
+
+@hed_error(ColumnErrors.MALFORMED_COLUMN_REF)
+def nested_column_ref(column_name, index, symbol):
+    return f"Column {column_name} has a malformed column reference.  Improper symbol {symbol} found at index {index}."
+
+
diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py
index 4a7fd91a9..cb1a959d5 100644
--- a/hed/errors/error_reporter.py
+++ b/hed/errors/error_reporter.py
@@ -10,6 +10,27 @@
 
 error_functions = {}
 
+# Controls if the default issue printing skips adding indentation for this context
+no_tab_context = {ErrorContext.HED_STRING, ErrorContext.SCHEMA_ATTRIBUTE}
+
+# Default sort ordering for issues list
+default_sort_list = [
+    ErrorContext.CUSTOM_TITLE,
+    ErrorContext.FILE_NAME,
+    ErrorContext.SIDECAR_COLUMN_NAME,
+    ErrorContext.SIDECAR_KEY_NAME,
+    ErrorContext.ROW,
+    ErrorContext.COLUMN,
+    ErrorContext.HED_STRING,
+    ErrorContext.SCHEMA_SECTION,
+    ErrorContext.SCHEMA_TAG,
+    ErrorContext.SCHEMA_ATTRIBUTE,
+]
+
+# ErrorContext which is expected to be int based.
+int_sort_list = [
+    ErrorContext.ROW,
+]
 
 def _register_error_function(error_type, wrapper_func):
     if error_type in error_functions:
@@ -153,19 +174,23 @@ def __init__(self, check_for_warnings=True):
         self.error_context = []
         self._check_for_warnings = check_for_warnings
 
-    def push_error_context(self, context_type, context, increment_depth_after=True):
+    def push_error_context(self, context_type, context):
         """ Push a new error context to narrow down error scope.
 
         Parameters:
             context_type (ErrorContext): A value from ErrorContext representing the type of scope.
             context (str, int, or HedString): The main value for the context_type.
-            increment_depth_after (bool): If True, add an extra tab to any subsequent errors in the scope.
 
         Notes:
             The context depends on the context_type. For ErrorContext.FILE_NAME this would be the actual filename.
 
         """
-        self.error_context.append((context_type, context, increment_depth_after))
+        if context is None:
+            if context_type in int_sort_list:
+                context = 0
+            else:
+                context_type = ""
+        self.error_context.append((context_type, context))
 
     def pop_error_context(self):
         """ Remove the last scope from the error context.
@@ -292,8 +317,8 @@ def _add_context_to_errors(error_object, error_context_to_add):
         """
         if error_object is None:
             error_object = {}
-        for (context_type, context, increment_count) in error_context_to_add:
-            error_object[context_type] = (context, increment_count)
+        for (context_type, context) in error_context_to_add:
+            error_object[context_type] = context
 
         return error_object
 
@@ -330,7 +355,7 @@ def _get_tag_span_to_error_object(error_object):
         else:
             return None, None
 
-        hed_string = error_object[ErrorContext.HED_STRING][0]
+        hed_string = error_object[ErrorContext.HED_STRING]
         span = hed_string._get_org_span(source_tag)
         return span
 
@@ -385,6 +410,7 @@ def filter_issues_by_severity(issues_list, severity):
 
 def get_exception_issue_string(issues, title=None):
     """ Return a string with issues list flatted into single string, one issue per line.
+        Possibly being deprecated.
 
     Parameters:
         issues (list):  A list of strings containing issues to print.
@@ -410,6 +436,29 @@ def get_exception_issue_string(issues, title=None):
     return issue_str
 
 
+def sort_issues(issues, reverse=False):
+    """Sorts a list of issues by the error context values.
+
+    Parameters:
+        issues (list): A list of dictionaries representing the issues to be sorted.
+        reverse (bool, optional): If True, sorts the list in descending order. Default is False.
+
+    Returns:
+        list: The sorted list of issues."""
+    def _get_keys(d):
+        result = []
+        for key in default_sort_list:
+            if key in int_sort_list:
+                result.append(d.get(key, -1))
+            else:
+                result.append(d.get(key, ""))
+        return tuple(result)
+
+    issues = sorted(issues, key=_get_keys, reverse=reverse)
+
+    return issues
+
+
 def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True):
     """ Return a string with issues list flatted into single string, one per line.
 
@@ -471,7 +520,7 @@ def _get_context_from_issue(val_issue, skip_filename=True):
         if skip_filename and key == ErrorContext.FILE_NAME:
             continue
         if key.startswith("ec_"):
-            single_issue_context.append((key, *val_issue[key]))
+            single_issue_context.append((key, val_issue[key]))
 
     return single_issue_context
 
@@ -512,7 +561,7 @@ def _get_context_string(single_issue_context, last_used_context):
     """ Convert a single context list into the final human readable output form.
 
     Parameters:
-        single_issue_context (list): A list of tuples containing the context(context_type, context, increment_tab)
+        single_issue_context (list): A list of tuples containing the context(context_type, context)
         last_used_context (list): A list of tuples containing the last drawn context.
 
     Returns:
@@ -528,18 +577,18 @@ def _get_context_string(single_issue_context, last_used_context):
     tab_count = 0
     found_difference = False
     for i, context_tuple in enumerate(single_issue_context):
-        (context_type, context, increment_tab) = context_tuple
+        (context_type, context) = context_tuple
         if len(last_used_context) > i and not found_difference:
             last_drawn = last_used_context[i]
             # Was drawn, and hasn't changed.
             if last_drawn == context_tuple:
-                if increment_tab:
+                if context_type not in no_tab_context:
                     tab_count += 1
                 continue
 
         context_string += _format_single_context_string(context_type, context, tab_count)
         found_difference = True
-        if increment_tab:
+        if context_type not in no_tab_context:
             tab_count += 1
 
     tab_string = '\t' * tab_count
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index ac76f6992..c4fb5df5f 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -21,11 +21,22 @@ class ErrorContext:
 
 class ValidationErrors:
     # General validation errors
-    HED_CHARACTER_INVALID = 'HED_CHARACTER_INVALID'
-    HED_COMMA_MISSING = 'HED_COMMA_MISSING'
+    CHARACTER_INVALID = 'CHARACTER_INVALID'
+    COMMA_MISSING = 'COMMA_MISSING'
+    DEF_EXPAND_INVALID = "DEF_EXPAND_INVALID"
+    DEF_INVALID = "DEF_INVALID"
+
+    # NOT OFFICIAL
     HED_DEF_UNMATCHED = "HED_DEF_UNMATCHED"
+    HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING"
+    HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA"
+
     HED_DEF_EXPAND_INVALID = "HED_DEF_EXPAND_INVALID"
-    HED_DEF_VALUE_INVALID = "HED_DEF_VALUE_INVALID"
+    HED_DEF_EXPAND_UNMATCHED = "HED_DEF_EXPAND_UNMATCHED"
+    HED_DEF_EXPAND_VALUE_MISSING = "HED_DEF_EXPAND_VALUE_MISSING"
+    HED_DEF_EXPAND_VALUE_EXTRA = "HED_DEF_EXPAND_VALUE_EXTRA"
+    # END NOT OFFICIAL
+
     HED_DEFINITION_INVALID = "HED_DEFINITION_INVALID"
     HED_NODE_NAME_EMPTY = 'HED_NODE_NAME_EMPTY'
     HED_ONSET_OFFSET_ERROR = 'HED_ONSET_OFFSET_ERROR'
@@ -70,8 +81,7 @@ class ValidationErrors:
     HED_MULTIPLE_TOP_TAGS = "HED_MULTIPLE_TOP_TAGS"
     HED_TAG_GROUP_TAG = "HED_TAG_GROUP_TAG"
 
-    HED_DEF_VALUE_MISSING = "HED_DEF_VALUE_MISSING"
-    HED_DEF_VALUE_EXTRA = "HED_DEF_VALUE_EXTRA"
+
 
 
 class SidecarErrors:
@@ -117,3 +127,10 @@ class OnsetErrors:
     ONSET_PLACEHOLDER_WRONG = "ONSET_PLACEHOLDER_WRONG"
     ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS"
     ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
+
+
+class ColumnErrors:
+    INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
+    SELF_COLUMN_REF = "SELF_COLUMN_REF"
+    NESTED_COLUMN_REF = "NESTED_COLUMN_REF"
+    MALFORMED_COLUMN_REF = "MALFORMED_COLUMN_REF"
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index af6249f56..f0e4209c2 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -354,33 +354,45 @@ def _dataframe_has_names(dataframe):
                 return True
         return False
 
-    def assemble(self, mapper=None):
+    def assemble(self, mapper=None, skip_square_brackets=False):
         """ Assembles the hed strings
 
         Parameters:
             mapper(ColumnMapper or None): Generally pass none here unless you want special behavior.
-
+            skip_square_brackets (bool): If True, don't plug in square bracket values into columns.
         Returns:
             Dataframe: the assembled dataframe
         """
         if mapper is None:
             mapper = self._mapper
 
+        all_columns = self._handle_transforms(mapper)
+        if skip_square_brackets:
+            return all_columns
+        transformers, _ = mapper.get_transformers()
+
+        return self._handle_square_brackets(all_columns, list(transformers))
+
+    def _handle_transforms(self, mapper):
         transformers, need_categorical = mapper.get_transformers()
-        if not transformers:
-            return self._dataframe
-        all_columns = self._dataframe
-        if need_categorical:
-            all_columns[need_categorical] = all_columns[need_categorical].astype('category')
+        if transformers:
+            all_columns = self._dataframe
+            if need_categorical:
+                all_columns[need_categorical] = all_columns[need_categorical].astype('category')
 
-        all_columns = all_columns.transform(transformers)
+            all_columns = all_columns.transform(transformers)
+
+            if need_categorical:
+                all_columns[need_categorical] = all_columns[need_categorical].astype('str')
+        else:
+            all_columns = self._dataframe
 
-        return self._insert_columns(all_columns, list(transformers.keys()))
+        return all_columns
 
     @staticmethod
-    def _find_column_refs(df):
+    def _find_column_refs(df, column_names):
         found_column_references = []
-        for column_name in df:
+        for column_name in column_names:
             df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
             u_vals = pd.Series([j for i in df_temp if isinstance(i, list) for j in i], dtype=str)
             u_vals = u_vals.unique()
@@ -391,21 +403,23 @@ def _find_column_refs(df):
         return found_column_references
 
     @staticmethod
-    def _insert_columns(df, known_columns=None):
-        if known_columns is None:
-            known_columns = list(df.columns)
-        possible_column_references = [f"{column_name}" for column_name in df.columns if
+    def _handle_square_brackets(df, known_columns=None):
+        """
+            Plug in square brackets with other columns
+
+            If known columns is passed, only use those columns to find or replace references.
+        """
+        if known_columns is not None:
+            column_names = list(known_columns)
+        else:
+            column_names = list(df.columns)
+        possible_column_references = [f"{column_name}" for column_name in column_names if
                                       isinstance(column_name, str) and column_name.lower() != "hed"]
-        found_column_references = BaseInput._find_column_refs(df)
+        found_column_references = BaseInput._find_column_refs(df, column_names)
 
-        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
-        if invalid_replacements:
-            # todo: This check may be moved to validation
-            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
         # todo: break this into a sub function(probably)
-        column_names = known_columns
         for column_name in valid_replacements:
             column_names.remove(column_name)
         saved_columns = df[valid_replacements]
diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py
index ca3b06b34..04cbfc440 100644
--- a/hed/models/definition_dict.py
+++ b/hed/models/definition_dict.py
@@ -184,7 +184,9 @@ def construct_def_tag(self, hed_tag):
             hed_tag(HedTag): The hed tag to identify definition contents in
         """
         if hed_tag.short_base_tag in {DefTagNames.DEF_ORG_KEY, DefTagNames.DEF_EXPAND_ORG_KEY}:
+            save_parent = hed_tag._parent
             def_contents = self._get_definition_contents(hed_tag)
+            hed_tag._parent = save_parent
             if def_contents is not None:
                 hed_tag._expandable = def_contents
                 hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY
diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py
index cb7581fa3..27c89d33b 100644
--- a/hed/models/definition_entry.py
+++ b/hed/models/definition_entry.py
@@ -26,13 +26,14 @@ def __init__(self, name, contents, takes_value, source_context):
         if contents:
             add_group_to_dict(contents, self.tag_dict)
 
-    def get_definition(self, replace_tag, placeholder_value=None):
+    def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False):
         """ Return a copy of the definition with the tag expanded and the placeholder plugged in.
 
         Parameters:
             replace_tag (HedTag): The def hed tag to replace with an expanded version
             placeholder_value (str or None):    If present and required, will replace any pound signs
                                                 in the definition contents.
+            return_copy_of_tag(bool): Set to true for validation
 
         Returns:
             str:          The expanded def tag name
@@ -45,6 +46,8 @@ def get_definition(self, replace_tag, placeholder_value=None):
         if self.takes_value == (placeholder_value is None):
             return None, []
 
+        if return_copy_of_tag:
+            replace_tag = replace_tag.copy()
         output_contents = [replace_tag]
         name = self.name
         if self.contents:
diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py
index 6df911801..7273d956c 100644
--- a/hed/models/hed_group.py
+++ b/hed/models/hed_group.py
@@ -132,9 +132,6 @@ def copy(self):
         Returns:
             HedGroup: The copied group.
 
-        Notes:
-            - The parent tag is removed.
-
         """
         save_parent = self._parent
         self._parent = None
diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py
index db16833f1..7be20fb5d 100644
--- a/hed/models/hed_string.py
+++ b/hed/models/hed_string.py
@@ -96,6 +96,7 @@ def shrink_defs(self):
             expanded_parent = def_expand_group._parent
             if expanded_parent:
                 def_expand_tag.short_base_tag = DefTagNames.DEF_ORG_KEY
+                def_expand_tag._parent = expanded_parent
                 expanded_parent.replace(def_expand_group, def_expand_tag)
 
         return self
@@ -118,6 +119,7 @@ def expand_defs(self):
         for tag, group in replacements:
             tag_parent = tag._parent
             tag_parent.replace(tag, group)
+            tag._parent = group
             tag.short_base_tag = DefTagNames.DEF_EXPAND_KEY
 
         return self
diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
index 29bcf8cf6..689eeac1d 100644
--- a/hed/models/hed_tag.py
+++ b/hed/models/hed_tag.py
@@ -54,6 +54,19 @@ def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None):
             if def_dict:
                 def_dict.construct_def_tag(self)
 
+    def copy(self):
+        """ Return a deep copy of this tag.
+
+        Returns:
+            HedTag: The copied group.
+
+        """
+        save_parent = self._parent
+        self._parent = None
+        return_copy = copy.deepcopy(self)
+        self._parent = save_parent
+        return return_copy
+
     @property
     def schema_prefix(self):
         """ Library prefix for this tag if one exists.
diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py
index 280eba77d..958cadfba 100644
--- a/hed/models/sidecar.py
+++ b/hed/models/sidecar.py
@@ -255,8 +255,7 @@ def extract_definitions(self, hed_schema=None, error_handler=None):
         if hed_schema:
             for hed_string, column_data, _ in self.hed_string_iter(error_handler):
                 hed_string_obj = HedString(hed_string, hed_schema)
-                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
-                                                 increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
                 self._extract_definition_issues += def_dict.check_for_definitions(hed_string_obj, error_handler)
                 error_handler.pop_error_context()
 
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index 84c2accbf..c0b821723 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -60,7 +60,7 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
             for attribute_name in tag_entry.attributes:
                 validator = schema_attribute_validators.get(attribute_name)
                 if validator:
-                    error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name, False)
+                    error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
                     new_issues = validator(hed_schema, tag_entry, tag_entry.attributes[attribute_name])
                     error_handler.add_context_and_filter(new_issues)
                     issues_list += new_issues
diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py
index 24a3d8e5b..5b18cd466 100644
--- a/hed/validator/def_validator.py
+++ b/hed/validator/def_validator.py
@@ -51,7 +51,7 @@ def _validate_def_contents(self, def_tag, def_expand_group):
             issues
         """
         def_issues = []
-
+        is_def_tag = def_expand_group is not def_tag
         is_label_tag = def_tag.extension_or_value_portion
         placeholder = None
         found_slash = is_label_tag.find("/")
@@ -62,17 +62,27 @@ def _validate_def_contents(self, def_tag, def_expand_group):
         label_tag_lower = is_label_tag.lower()
         def_entry = self.defs.get(label_tag_lower)
         if def_entry is None:
-            def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_UNMATCHED, tag=def_tag)
+            error_code = ValidationErrors.HED_DEF_UNMATCHED
+            if is_def_tag:
+                error_code = ValidationErrors.HED_DEF_EXPAND_UNMATCHED
+            def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
         else:
-            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder)
+            def_tag_name, def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder,
+                                                                  return_copy_of_tag=True)
             if def_tag_name:
-                if def_expand_group is not def_tag and def_expand_group != def_contents:
+                if is_def_tag and def_expand_group != def_contents:
                     def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
                                                             tag=def_tag, actual_def=def_contents,
                                                             found_def=def_expand_group)
             elif def_entry.takes_value:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_MISSING, tag=def_tag)
+                error_code = ValidationErrors.HED_DEF_VALUE_MISSING
+                if is_def_tag:
+                    error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING
+                def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
             else:
-                def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_VALUE_EXTRA, tag=def_tag)
+                error_code = ValidationErrors.HED_DEF_VALUE_EXTRA
+                if is_def_tag:
+                    error_code = ValidationErrors.HED_DEF_EXPAND_VALUE_EXTRA
+                def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
 
-        return def_issues
+        return def_issues
\ No newline at end of file
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
index af12005b1..daa71fb07 100644
--- a/hed/validator/sidecar_validator.py
+++ b/hed/validator/sidecar_validator.py
@@ -4,6 +4,7 @@
 from hed import HedString
 from hed import Sidecar
 from hed.models.column_metadata import ColumnMetadata
+from hed.errors.error_reporter import sort_issues
 
 
 class SidecarValidator:
@@ -49,8 +50,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
         for hed_string, column_data, position in sidecar.hed_string_iter(error_handler):
             hed_string_obj = HedString(hed_string, hed_schema=self._schema, def_dict=sidecar_def_dict)
 
-            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj,
-                                             increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
             new_issues = hed_validator.run_basic_checks(hed_string_obj, allow_placeholders=True)
             if not new_issues:
                 new_issues = hed_validator.run_full_string_checks(hed_string_obj)
@@ -61,6 +61,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
             error_handler.pop_error_context()
 
         error_handler.pop_error_context()
+        issues = sort_issues(issues)
         return issues
 
     def validate_structure(self, sidecar, error_handler):
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ba1f341ac..8b8aa9b1f 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -1,9 +1,12 @@
 import pandas as pd
+import re
 from hed import BaseInput
 from hed.errors import ErrorHandler, ValidationErrors, ErrorContext
+from hed.errors.error_types import ColumnErrors
 from hed.models import ColumnType
 from hed import HedString
 from hed.models.hed_string_group import HedStringGroup
+from hed.errors.error_reporter import sort_issues
 
 PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
 
@@ -25,6 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
 
         Parameters:
             data (BaseInput or pd.DataFrame): Input data to be validated.
+                If a dataframe, it is assumed to be assembled already.
             def_dicts(list of DefDict or DefDict): all definitions to use for validation
             name(str): The name to report errors from this file as
             error_handler (ErrorHandler): Error context to use.  Creates a new one if None
@@ -41,31 +45,32 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
-            # todo ian: Add more checks here for column inserters
+            issues += self._validate_square_brackets(data.assemble(skip_square_brackets=True), error_handler)
             data = data.dataframe_a
 
         # Check the rows of the input data
         issues += self._run_checks(data, error_handler)
         error_handler.pop_error_context()
+
+        issues = sort_issues(issues)
         return issues
 
     def _run_checks(self, data, error_handler):
         issues = []
+        columns = list(data.columns)
         for row_number, text_file_row in enumerate(data.itertuples(index=False)):
             error_handler.push_error_context(ErrorContext.ROW, row_number)
             row_strings = []
             new_column_issues = []
-            # todo: make this report the correct column numbers(somehow  - it almost surely doesn't right now)
             for column_number, cell in enumerate(text_file_row):
                 if not cell or cell == "n/a":
                     continue
 
-                error_handler.push_error_context(ErrorContext.COLUMN, column_number)
+                error_handler.push_error_context(ErrorContext.COLUMN, columns[column_number])
 
                 column_hed_string = HedString(cell)
                 row_strings.append(column_hed_string)
-                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string,
-                                                 increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string)
                 new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False)
 
                 error_handler.add_context_and_filter(new_column_issues)
@@ -77,7 +82,7 @@ def _run_checks(self, data, error_handler):
                 continue
             else:
                 row_string = HedStringGroup(row_strings)
-                error_handler.push_error_context(ErrorContext.HED_STRING, row_string, increment_depth_after=False)
+                error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
                 new_column_issues = self._hed_validator.run_full_string_checks(row_string)
 
                 error_handler.add_context_and_filter(new_column_issues)
@@ -113,3 +118,85 @@ def _validate_column_structure(self, base_input, error_handler):
                 error_handler.pop_error_context()
 
         return issues
+
+    @staticmethod
+    def _validate_column_refs(df, error_handler):
+        possible_column_references = [f"{column_name}" for column_name in df.columns if
+                                      isinstance(column_name, str) and column_name.lower() != "hed"]
+
+        issues = []
+        found_column_references = {}
+        for column_name in df:
+            matches = df[column_name].str.findall("\[([a-z_\-\s0-9]+)(?<!\[)\]", re.IGNORECASE)
+            for row_number, row in enumerate(matches):
+                for match in row:
+                    if match not in possible_column_references:
+                        error_handler.push_error_context(ErrorContext.ROW, row_number)
+                        error_handler.push_error_context(ErrorContext.COLUMN, column_name)
+                        error_handler.push_error_context(ErrorContext.HED_STRING, df[column_name][row_number])
+
+                        issues += error_handler.format_error_with_context(ColumnErrors.INVALID_COLUMN_REF,
+                                                                          match)
+                        error_handler.pop_error_context()
+                        error_handler.pop_error_context()
+                        error_handler.pop_error_context()
+
+            references = [match for sublist in matches for match in sublist]
+            if references:
+                found_column_references[column_name] = references
+            if column_name in references:
+                issues += error_handler.format_error_with_context(ColumnErrors.SELF_COLUMN_REF, column_name)
+        for column_name, refs in found_column_references.items():
+            for ref in refs:
+                if ref in found_column_references:
+                    issues += error_handler.format_error_with_context(ColumnErrors.NESTED_COLUMN_REF, column_name,
+                                                                      ref)
+        return issues
+
+    @staticmethod
+    def _invalid_bracket_locations(text):
+        first_opening = text.find('[')
+        last_closing = text.rfind(']')
+
+        invalid_location = None
+
+        if first_opening == -1 or last_closing == -1:
+            if first_opening != -1:
+                invalid_location = first_opening
+            elif last_closing != -1:
+                invalid_location = last_closing
+        else:
+            for index, char in enumerate(text[first_opening + 1:last_closing]):
+                if char == '[' or char == ']':
+                    invalid_location = index + first_opening + 1
+                    break
+
+        return invalid_location
+
+    @staticmethod
+    def _validate_missing_malformed_brackets(df, error_handler):
+        invalid_brackets_df = df.applymap(lambda x: SpreadsheetValidator._invalid_bracket_locations(str(x)))
+
+        issues = []
+        s = invalid_brackets_df.stack()
+        s = s.dropna()
+        for index, value in s.items():
+            row, column = index
+            cell_text = df[column][row]
+            error_handler.push_error_context(ErrorContext.ROW, row)
+            error_handler.push_error_context(ErrorContext.COLUMN, column)
+            error_handler.push_error_context(ErrorContext.HED_STRING, cell_text)
+            issues += error_handler.format_error_with_context(ColumnErrors.MALFORMED_COLUMN_REF, column, value,
+                                                              cell_text[value])
+            error_handler.pop_error_context()
+            error_handler.pop_error_context()
+            error_handler.pop_error_context()
+
+        return issues
+
+    def _validate_square_brackets(self, df, error_handler):
+        issues = []
+        issues += self._validate_column_refs(df, error_handler)
+        issues += self._validate_missing_malformed_brackets(df, error_handler)
+
+        return issues
\ No newline at end of file
diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py
index 2d08eae62..4ecadfc5a 100644
--- a/hed/validator/tag_validator.py
+++ b/hed/validator/tag_validator.py
@@ -201,13 +201,13 @@ def check_delimiter_issues_in_hed_string(self, hed_string):
                 if current_tag.strip() == self.OPENING_GROUP_CHARACTER:
                     current_tag = ''
                 else:
-                    issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag)
+                    issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag)
             elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER:
                 issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_EMPTY, source_string=hed_string,
                                                     char_index=i)
             elif TagValidator._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character,
                                                                           current_character):
-                issues += ErrorHandler.format_error(ValidationErrors.HED_COMMA_MISSING, tag=current_tag[:-1])
+                issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1])
                 break
             last_non_empty_valid_character = current_character
             last_non_empty_valid_index = i
@@ -495,7 +495,7 @@ def _report_invalid_character_error(self, hed_string, index):
             list: A singleton list with a dictionary representing the error.
 
         """
-        error_type = ValidationErrors.HED_CHARACTER_INVALID
+        error_type = ValidationErrors.CHARACTER_INVALID
         character = hed_string[index]
         if character == "~":
             error_type = ValidationErrors.HED_TILDES_UNSUPPORTED
diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py
index 28c7fbffe..40765f51b 100644
--- a/tests/errors/test_error_reporter.py
+++ b/tests/errors/test_error_reporter.py
@@ -23,11 +23,11 @@ def test_push_error_context(self):
         self.error_handler.reset_error_context()
         self.error_handler.push_error_context(ErrorContext.FILE_NAME, name)
         self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-        self.error_handler.push_error_context(ErrorContext.COLUMN, 1)
+        self.error_handler.push_error_context(ErrorContext.COLUMN, column_name)
         error_list = self.error_handler.format_error_with_context(ValidationErrors.HED_TAG_NOT_UNIQUE, "")
         self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME])
         self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME])
-        self.assertTrue(1 in error_list[0][ErrorContext.COLUMN])
+        self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN])
         self.assertTrue(len(error_list) == 1)
         self.error_handler.reset_error_context()
 
@@ -48,12 +48,12 @@ def test_pop_error_context(self):
         self.assertTrue(len(error_list) == 1)
         self.error_handler.push_error_context(ErrorContext.FILE_NAME, name)
         self.error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
-        self.error_handler.push_error_context(ErrorContext.COLUMN, 1)
+        self.error_handler.push_error_context(ErrorContext.COLUMN, column_name)
         error_list = self.error_handler.format_error_with_context(ValidationErrors.HED_TAG_NOT_UNIQUE, "")
         self.assertTrue(len(error_list) == 1)
         self.assertTrue(name in error_list[0][ErrorContext.FILE_NAME])
         self.assertTrue(column_name in error_list[0][ErrorContext.SIDECAR_COLUMN_NAME])
-        self.assertTrue(1 in error_list[0][ErrorContext.COLUMN])
+        self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN])
         self.error_handler.pop_error_context()
         self.error_handler.pop_error_context()
         self.error_handler.pop_error_context()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index 8404be04e..30f3714aa 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -110,7 +110,7 @@ def test_insert_columns_simple(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
     def test_insert_columns_multiple_rows(self):
@@ -121,17 +121,9 @@ def test_insert_columns_multiple_rows(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Action", "Event, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    # def test_insert_columns_no_circular_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column2], Event, Action"],
-    #         "column2": ["[column1], Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
     def test_insert_columns_multiple_columns(self):
         df = pd.DataFrame({
             "column1": ["[column2], Event, [column3], Action"],
@@ -141,17 +133,9 @@ def test_insert_columns_multiple_columns(self):
         expected_df = pd.DataFrame({
             "column1": ["Item, Event, Subject, Action"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    def test_insert_columns_invalid_column_name(self):
-        df = pd.DataFrame({
-            "column1": ["[invalid_column], Event, Action"],
-            "column2": ["Item"]
-        })
-        with self.assertRaises(ValueError):
-            result = BaseInput._insert_columns(df)
-
     def test_insert_columns_four_columns(self):
         df = pd.DataFrame({
             "column1": ["[column2], Event, [column3], Action"],
@@ -163,25 +147,9 @@ def test_insert_columns_four_columns(self):
             "column1": ["Item, Event, Subject, Action"],
             "column4": ["Data"]
         })
-        result = BaseInput._insert_columns(df)
+        result = BaseInput._handle_square_brackets(df)
         pd.testing.assert_frame_equal(result, expected_df)
 
-    # def test_insert_columns_invalid_syntax(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["column2], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    # def test_insert_columns_no_self_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column1], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
 
 class TestCombineDataframe(unittest.TestCase):
     def test_combine_dataframe_with_strings(self):
@@ -248,7 +216,7 @@ def test_simple_column_refs(self):
             'B': ['tag3, tag4', '[col3]'],
         }
         df1 = pd.DataFrame(data1)
-        result1 = BaseInput._find_column_refs(df1)
+        result1 = BaseInput._find_column_refs(df1, df1.columns)
         expected1 = ['col1', 'col2', 'col3']
         self.assertEqual(result1, expected1)
 
@@ -257,7 +225,7 @@ def test_mixed_cases_and_patterns(self):
             'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
         }
         df2 = pd.DataFrame(data2)
-        result2 = BaseInput._find_column_refs(df2)
+        result2 = BaseInput._find_column_refs(df2, df2.columns)
         expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
         self.assertEqual(result2, expected2)
 
@@ -267,7 +235,7 @@ def test_no_column_references(self):
             'B': ['tag5, tag6', 'tag7, tag8'],
         }
         df3 = pd.DataFrame(data3)
-        result3 = BaseInput._find_column_refs(df3)
+        result3 = BaseInput._find_column_refs(df3, df3.columns)
         expected3 = []
         self.assertEqual(result3, expected3)
 
@@ -277,6 +245,6 @@ def test_incomplete_square_brackets(self):
             'B': ['tag3, [COL4', '[col5, col6]'],
         }
         df4 = pd.DataFrame(data4)
-        result4 = BaseInput._find_column_refs(df4)
+        result4 = BaseInput._find_column_refs(df4, df4.columns)
         expected4 = ['col2']
         self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/schema/test_convert_tags.py b/tests/schema/test_convert_tags.py
index ebfa134a1..c015facb6 100644
--- a/tests/schema/test_convert_tags.py
+++ b/tests/schema/test_convert_tags.py
@@ -14,7 +14,7 @@ def converter_base(self, test_strings, expected_results, expected_errors, conver
         for test_key in test_strings:
             test_string_obj = HedString(test_strings[test_key])
             error_handler = ErrorHandler()
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string_obj, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string_obj)
             test_issues = test_string_obj.convert_to_canonical_forms(self.hed_schema)
             if convert_to_short:
                 string_result = test_string_obj.get_as_short()
diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py
index f889b36f1..6bef321a7 100644
--- a/tests/validator/test_def_validator.py
+++ b/tests/validator/test_def_validator.py
@@ -117,3 +117,180 @@ def test_duplicate_def(self):
         self.assertEqual(len(def_dict.issues), 2)
         self.assertTrue('ec_row' in def_dict.issues[0])
 
+
+
+class TestDefErrors(unittest.TestCase):
+    basic_hed_string_with_def_first_paren = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
+        hed_xml_file = os.path.realpath(os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml"))
+        cls.hed_schema = schema.load_schema(hed_xml_file)
+        cls.def_contents_string = "(Item/TestDef1,Item/TestDef2)"
+        cls.basic_definition_string = f"(Definition/TestDef,{cls.def_contents_string})"
+        cls.basic_definition_string_no_paren = f"Definition/TestDef,{cls.def_contents_string}"
+        cls.label_def_string = "Def/TestDef"
+        cls.expanded_def_string = f"(Def-expand/TestDef,{cls.def_contents_string})"
+        cls.basic_hed_string = "Item/BasicTestTag1,Item/BasicTestTag2"
+        cls.basic_hed_string_with_def = f"{cls.basic_hed_string},{cls.label_def_string}"
+        cls.basic_hed_string_with_def_first = f"{cls.label_def_string},{cls.basic_hed_string}"
+        cls.basic_hed_string_with_def_first_paren = f"({cls.label_def_string},{cls.basic_hed_string})"
+        cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471"
+        cls.placeholder_definition_contents = "(Item/TestDef1/#,Item/TestDef2)"
+        cls.placeholder_definition_string = f"(Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents})"
+        cls.placeholder_definition_string_no_paren = \
+            f"Definition/TestDefPlaceholder/#,{cls.placeholder_definition_contents}"
+        cls.placeholder_expanded_def_string = "(Def-expand/TestDefPlaceholder/2471,(Item/TestDef1/2471,Item/TestDef2))"
+
+        cls.placeholder_hed_string_with_def = f"{cls.basic_hed_string},{cls.placeholder_label_def_string}"
+        cls.placeholder_hed_string_with_def_first = f"{cls.placeholder_label_def_string},{cls.basic_hed_string}"
+        cls.placeholder_hed_string_with_def_first_paren = f"({cls.placeholder_label_def_string},{cls.basic_hed_string})"
+
+    def base_def_validator(self, test_strings, result_strings, expand_defs, shrink_defs,
+                           remove_definitions, extra_ops=None,
+                           basic_definition_string=None):
+        if not basic_definition_string:
+            basic_definition_string = self.basic_definition_string
+        def_dict = DefValidator(basic_definition_string, hed_schema=self.hed_schema)
+
+
+        for key in test_strings:
+            string, expected_result = test_strings[key], result_strings[key]
+            test_string = HedString(string, self.hed_schema, def_dict)
+            def_issues = def_dict.validate_def_tags(test_string)
+            if remove_definitions:
+                test_string.remove_definitions()
+            if expand_defs:
+                test_string.expand_defs()
+            if shrink_defs:
+                test_string.shrink_defs()
+            self.assertEqual(False, bool(def_issues))
+            self.assertEqual(test_string.get_as_short(), expected_result)
+
+    def test_expand_def_tags(self):
+        basic_def_strings = {
+            'str_no_defs': self.basic_definition_string,
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.basic_definition_string,
+            'str4': self.basic_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string_with_def,
+            'str6': self.basic_hed_string_with_def_first,
+            'str7': self.basic_hed_string_with_def_first_paren,
+            'str8': "("  + self.basic_hed_string_with_def_first_paren + ")",
+        }
+        expanded_def_strings = {
+            'str_no_defs': "",
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string,
+            'str4': self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.expanded_def_string,
+            'str6': self.expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")",
+            'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))",
+        }
+        expanded_def_strings_with_definition = {
+            'str_no_defs': self.basic_definition_string,
+            'str2': self.basic_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.basic_definition_string,
+            'str4': self.basic_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.expanded_def_string,
+            'str6': self.expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.expanded_def_string + "," + self.basic_hed_string + ")",
+            'str8': "((" + self.expanded_def_string + "," + self.basic_hed_string + "))",
+        }
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition,
+                                expand_defs=True,
+                                shrink_defs=False, remove_definitions=False)
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=False, remove_definitions=False)
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True, remove_definitions=False)
+        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings,
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False)
+        self.base_def_validator(expanded_def_strings_with_definition, expanded_def_strings_with_definition,
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=False)
+        self.base_def_validator(basic_def_strings, expanded_def_strings, 
+                                expand_defs=True, shrink_defs=False, remove_definitions=True)
+
+    def test_expand_def_tags_placeholder(self):
+        basic_def_strings = {
+            'str_no_defs': self.placeholder_definition_string,
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
+            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
+            'str5': self.placeholder_hed_string_with_def,
+            'str6': self.placeholder_hed_string_with_def_first,
+            'str7': self.placeholder_hed_string_with_def_first_paren,
+        }
+        expanded_def_strings = {
+            'str_no_defs': "",
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string,
+            'str4': self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
+            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
+        }
+        expanded_def_strings_with_definition = {
+            'str_no_defs': self.placeholder_definition_string,
+            'str2': self.placeholder_definition_string_no_paren,
+            'str3': self.basic_hed_string + "," + self.placeholder_definition_string,
+            'str4': self.placeholder_definition_string + "," + self.basic_hed_string,
+            'str5': self.basic_hed_string + "," + self.placeholder_expanded_def_string,
+            'str6': self.placeholder_expanded_def_string + "," + self.basic_hed_string,
+            'str7': "(" + self.placeholder_expanded_def_string + "," + self.basic_hed_string + ")",
+        }
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings_with_definition, 
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=False,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(expanded_def_strings_with_definition, basic_def_strings, 
+                                expand_defs=False, shrink_defs=True,
+                                remove_definitions=False, basic_definition_string=self.placeholder_definition_string)
+
+        self.base_def_validator(basic_def_strings, expanded_def_strings, 
+                                expand_defs=True, shrink_defs=False,
+                                remove_definitions=True, basic_definition_string=self.placeholder_definition_string)
+
+
+    # todo ian: finish updating these
+    # # special case test
+    # def test_changing_tag_then_def_mapping(self):
+    #     def_dict = DefinitionDict()
+    #     def_string = HedString(self.basic_definition_string)
+    #     def_string.convert_to_canonical_forms(None)
+    #     def_dict.check_for_definitions(def_string)
+    #     def_mapper = DefMapper(def_dict)
+    #     validator = HedValidator(self.hed_schema)
+    #     hed_ops = [validator, def_mapper]
+    #
+    #     test_string = HedString(self.label_def_string)
+    #     tag = test_string.children[0]
+    #     tag.tag = "Organizational-property/" + str(tag)
+    #     def_issues = test_string.validate(hed_ops, expand_defs=True)
+    #     self.assertFalse(def_issues)
+    #     self.assertEqual(test_string.get_as_short(), f"{self.expanded_def_string}")
+    #
+    #     test_string = HedString(self.label_def_string)
+    #     tag = test_string.children[0]
+    #     tag.tag = "Organizational-property22/" + str(tag)
+    #     def_issues = test_string.validate(hed_ops, expand_defs=True)
+    #     self.assertTrue(def_issues)
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py
index de46d116b..ef7e3aa54 100644
--- a/tests/validator/test_onset_validator.py
+++ b/tests/validator/test_onset_validator.py
@@ -48,7 +48,7 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder
         for string, expected_params, context in zip(test_strings, test_issues, test_context):
             test_string = HedString(string, self.hed_schema)
             error_handler = ErrorHandler()
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string)
 
             onset_issues = []
             onset_issues += validator.validate_onset_offset(test_string)
@@ -67,7 +67,7 @@ def _test_issues_no_context(self, test_strings, test_issues):
         for string, expected_params in zip(test_strings, test_issues):
             test_string = HedString(string)
             error_handler = ErrorHandler(check_for_warnings=False)
-            error_handler.push_error_context(ErrorContext.HED_STRING, test_string, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, test_string)
             onset_issues = hed_validator.validate(test_string, False)
             error_handler.add_context_and_filter(onset_issues)
             issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params)
diff --git a/tests/validator/test_spreadsheet_validator.py b/tests/validator/test_spreadsheet_validator.py
new file mode 100644
index 000000000..ef43f9bf3
--- /dev/null
+++ b/tests/validator/test_spreadsheet_validator.py
@@ -0,0 +1,57 @@
+import pandas as pd
+import unittest
+from hed import BaseInput, load_schema_version
+from hed.validator import SpreadsheetValidator
+from hed.errors import ErrorHandler, sort_issues
+from hed.errors.error_types import ColumnErrors
+
+
+
+class TestInsertColumns(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.schema = load_schema_version("8.1.0")
+        cls.validator = SpreadsheetValidator(cls.schema)
+
+    def test_insert_columns_no_nested_or_circular_reference(self):
+        df = pd.DataFrame({
+            "column1": ["[column2], Event, Action"],
+            "column2": ["[column1], Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.NESTED_COLUMN_REF)
+
+    def test_insert_columns_invalid_column_name(self):
+        df = pd.DataFrame({
+            "column1": ["[invalid_column], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.INVALID_COLUMN_REF)
+
+    def test_insert_columns_invalid_syntax(self):
+        df = pd.DataFrame({
+            "column1": ["column2], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.MALFORMED_COLUMN_REF)
+
+    def test_insert_columns_invalid_syntax2(self):
+        df = pd.DataFrame({
+            "column1": ["column2], Event, Action", "[column, Event, Action"],
+            "column2": ["Item", "Action"],
+            "column3": ["This is a [malformed [input string]] with extra [opening brackets", "[Event[Action]]"],
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        issues = sort_issues(issues)
+        self.assertEqual(issues[0]['code'], ColumnErrors.MALFORMED_COLUMN_REF)
+        self.assertEqual(len(issues), 6)
+
+    def test_insert_columns_no_self_reference(self):
+        df = pd.DataFrame({
+            "column1": ["[column1], Event, Action"],
+            "column2": ["Item"]
+        })
+        issues = self.validator._validate_square_brackets(df, error_handler=ErrorHandler(True))
+        self.assertEqual(issues[0]['code'], ColumnErrors.SELF_COLUMN_REF)
diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py
index dc0fb910a..939423638 100644
--- a/tests/validator/test_tag_validator.py
+++ b/tests/validator/test_tag_validator.py
@@ -621,9 +621,9 @@ def test_malformed_delimiters(self):
             # 'emptyGroup': False
         }
         expected_issues = {
-            'missingOpeningComma': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'missingOpeningComma': self.format_error(ValidationErrors.COMMA_MISSING,
                                                      tag="Action/Reach/To touch("),
-            'missingClosingComma': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'missingClosingComma': self.format_error(ValidationErrors.COMMA_MISSING,
                                                      tag="Participant/Effect/Body part/Arm)"),
             'extraOpeningComma': self.format_error(ValidationErrors.HED_TAG_EMPTY,
                                                    source_string=test_strings['extraOpeningComma'],
@@ -674,7 +674,7 @@ def test_malformed_delimiters(self):
             'validNestedParentheses2': [],
             'validNestedParentheses3': [],
             'validNestedParentheses4': [],
-            'invalidNestedParentheses': self.format_error(ValidationErrors.HED_COMMA_MISSING,
+            'invalidNestedParentheses': self.format_error(ValidationErrors.COMMA_MISSING,
                                                           tag="Thing)) "),
             # 'emptyGroup': []
         }
@@ -698,13 +698,13 @@ def test_invalid_characters(self):
             'closingBracket': False
         }
         expected_issues = {
-            'openingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'openingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                               source_string=test_strings['openingBrace']),
-            'closingBrace': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'closingBrace': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                               source_string=test_strings['closingBrace']),
-            'openingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'openingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                                 source_string=test_strings['openingBracket']),
-            'closingBracket': self.format_error(ValidationErrors.HED_CHARACTER_INVALID, char_index=45,
+            'closingBracket': self.format_error(ValidationErrors.CHARACTER_INVALID, char_index=45,
                                                 source_string=test_strings['closingBracket'])
         }
         self.validator_semantic(test_strings, expected_results, expected_issues, False)
diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py
index 37d78668c..f50c41af4 100644
--- a/tests/validator/test_tag_validator_base.py
+++ b/tests/validator/test_tag_validator_base.py
@@ -76,7 +76,7 @@ def validator_base(self, test_strings, expected_results, expected_issues, test_f
         for test_key in test_strings:
             hed_string_obj = HedString(test_strings[test_key])
             error_handler = ErrorHandler(check_for_warnings=check_for_warnings)
-            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, increment_depth_after=False)
+            error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj)
             test_issues = []
             if self.compute_forms:
                 test_issues += hed_string_obj.convert_to_canonical_forms(hed_schema)

From 7cf78262d4594f5fe0652c23e629e174f71a8891 Mon Sep 17 00:00:00 2001
From: IanCa <30812436+IanCa@users.noreply.github.com>
Date: Thu, 23 Mar 2023 18:32:37 -0500
Subject: [PATCH 17/19] Block HED from appearing in sidecars (#635)

* Block HED from appearing in sidecars
---
 hed/errors/error_messages.py       |  7 ++++++-
 hed/errors/error_types.py          |  3 ++-
 hed/validator/sidecar_validator.py | 23 ++++++++++++++++++++++-
 spec_tests/test_errors.py          | 24 ++++++++++++++++++++----
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
index ca379992f..7fd609a64 100644
--- a/hed/errors/error_messages.py
+++ b/hed/errors/error_messages.py
@@ -277,7 +277,12 @@ def sidecar_error_unknown_column(column_name):
 
 
 @hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=SidecarErrors.SIDECAR_INVALID)
-def sidecar_hed_used():
+def SIDECAR_HED_USED():
+    return "'HED' is a reserved name and cannot be used as a sidecar except in expected places."
+
+
+@hed_error(SidecarErrors.SIDECAR_HED_USED_COLUMN, actual_code=SidecarErrors.SIDECAR_INVALID)
+def SIDECAR_HED_USED_COLUMN():
     return "'HED' is a reserved name and cannot be used as a sidecar column name"
 
 
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index c4fb5df5f..272bfe299 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -92,8 +92,9 @@ class SidecarErrors:
     INVALID_POUND_SIGNS_VALUE = 'invalidNumberPoundSigns'
     INVALID_POUND_SIGNS_CATEGORY = 'tooManyPoundSigns'
     UNKNOWN_COLUMN_TYPE = 'sidecarUnknownColumn'
-    SIDECAR_HED_USED = 'SIDECAR_HED_USED'
+    SIDECAR_HED_USED_COLUMN = 'SIDECAR_HED_USED_COLUMN'
     SIDECAR_NA_USED = 'SIDECAR_NA_USED'
+    SIDECAR_HED_USED = 'SIDECAR_HED_USED'
 
 class SchemaErrors:
     HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
index daa71fb07..8c68808e8 100644
--- a/hed/validator/sidecar_validator.py
+++ b/hed/validator/sidecar_validator.py
@@ -81,6 +81,23 @@ def validate_structure(self, sidecar, error_handler):
             error_handler.pop_error_context()
         return all_validation_issues
 
+    @staticmethod
+    def _check_for_key(key, data):
+        if isinstance(data, dict):
+            if key in data:
+                return bool(data[key])
+            else:
+                for sub_data in data.values():
+                    result = SidecarValidator._check_for_key(key, sub_data)
+                    if result is not None:
+                        return result
+        elif isinstance(data, list):
+            for sub_data in data:
+                result = SidecarValidator._check_for_key(key, sub_data)
+                if result is not None:
+                    return result
+        return None
+
     def _validate_column_structure(self, column_name, dict_for_entry, error_handler):
         """ Checks primarily for type errors such as expecting a string and getting a list in a json sidecar.
 
@@ -93,13 +110,17 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
         """
         val_issues = []
         if column_name in self.reserved_column_names:
-            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
+            val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
             return val_issues
 
         column_type = Sidecar._detect_column_type(dict_for_entry=dict_for_entry)
         if column_type is None:
             val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
                                                                   column_name=column_name)
+        elif column_type == ColumnType.Ignore:
+            found_hed = self._check_for_key("HED", dict_for_entry)
+            if found_hed:
+                val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED)
         elif column_type == ColumnType.Categorical:
             raw_hed_dict = dict_for_entry["HED"]
             if not raw_hed_dict:
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index 9c80d4d98..81942a915 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -11,6 +11,13 @@
 from hed.errors import ErrorHandler, get_printable_issue_string
 
 
+known_errors = [
+    'SIDECAR_INVALID',
+    'CHARACTER_INVALID',
+    'COMMA_MISSING',
+    "DEF_EXPAND_INVALID",
+    "DEF_INVALID",
+]
 
 skip_tests = ["VERSION_DEPRECATED", "CHARACTER_INVALID", "STYLE_WARNING"]
 
@@ -30,6 +37,12 @@ def run_single_test(self, test_file):
             test_info = json.load(fp)
         for info in test_info:
             error_code = info['error_code']
+            verify_code = False
+            if error_code in known_errors:
+                verify_code = True
+
+            # To be deprecated once we add this to all tests
+            self._verify_code = verify_code
             if error_code in skip_tests:
                 print(f"Skipping {error_code} test")
                 continue
@@ -62,6 +75,13 @@ def report_result(self, expected_result, issues, error_code, description, name,
                 print(f"Passed '{test_type}' (which should fail) '{name}': {test}")
                 print(get_printable_issue_string(issues))
                 self.fail_count.append(name)
+            elif self._verify_code:
+                if any(issue['code'] == error_code for issue in issues):
+                    return
+                print(f"{error_code}: {description}")
+                print(f"Failed '{test_type}' (unexpected errors found) '{name}': {test}")
+                print(get_printable_issue_string(issues))
+                self.fail_count.append(name)
         else:
             if issues:
                 print(f"{error_code}: {description}")
@@ -75,9 +95,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio
             for test in tests:
                 test_string = HedString(test, schema)
 
-                # This expand should not be required here.
-                def_dict.expand_def_tags(test_string)
-
                 issues = string_validator.run_basic_checks(test_string, False)
                 issues += string_validator.run_full_string_checks(test_string)
                 error_handler.add_context_and_filter(issues)
@@ -86,7 +103,6 @@ def _run_single_string_test(self, info, schema, def_dict, error_code, descriptio
     def _run_single_sidecar_test(self, info, schema, def_dict, error_code, description, name, error_handler):
         for result, tests in info.items():
             for test in tests:
-                # Well this is a disaster
                 buffer = io.BytesIO(json.dumps(test).encode("utf-8"))
                 sidecar = Sidecar(buffer)
                 issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler)

From 69f320d4eb91d6a1b8700140699f9100f7be6469 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 27 Mar 2023 14:48:41 -0500
Subject: [PATCH 18/19] Updated the search in analysis tools

---
 hed/models/df_util.py                         |  6 +-
 hed/tools/analysis/analysis_util.py           | 68 ++++++++++++++++++-
 .../operations/factor_hed_tags_op.py          | 25 ++-----
 .../test_analysis_util_assemble_hed.py        | 50 +++++++-------
 4 files changed, 98 insertions(+), 51 deletions(-)

diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index f9fa19dcc..989299d2f 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -26,7 +26,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
         expand_defs: bool
             Expand any def tags found
     Returns:
-        tuple: A list of HedStrings, or a list of lists of HedStrings, DefinitionDict
+        tuple: A list of HedStrings or a list of lists of HedStrings, DefinitionDict
         
     """
     if isinstance(sidecar, str):
@@ -76,13 +76,13 @@ def convert_to_form(df, hed_schema, tag_form, columns=None):
 
 
 def shrink_defs(df, hed_schema, columns=None):
-    """ Shrinks any def-expand tags found in the dataframe.
+    """ Shrinks any def-expand tags found in the specified columns in the dataframe.
 
         Converts in place
     Parameters:
         df (pd.Dataframe or pd.Series): The dataframe or series to modify
         hed_schema (HedSchema or None): The schema to use to identify defs.
-        columns (list or None): The columns to modify on the dataframe
+        columns (list or None): The columns to modify on the dataframe.
     """
     if isinstance(df, pd.Series):
         mask = df.str.contains('Def-expand/', case=False)
diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
index a4c57c9f6..aa13f288d 100644
--- a/hed/tools/analysis/analysis_util.py
+++ b/hed/tools/analysis/analysis_util.py
@@ -6,6 +6,7 @@
 from hed.models.hed_tag import HedTag
 from hed.models.hed_group import HedGroup
 from hed.models import df_util
+from hed.models import QueryParser
 
 
 def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
@@ -44,6 +45,68 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
     return df, definitions
 
 
+def get_expression_parsers(queries, query_names=None):
+    """ Returns a list of expression parsers and query_names.
+
+        Parameters:
+            queries (list):  A list of query strings or QueryParser objects
+            query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
+
+        Returns:
+            DataFrame - containing the search strings
+
+        Raises:   
+            ValueError - if query names are invalid or duplicated.
+
+        """
+    expression_parsers = []
+    if not query_names:
+        query_names = [f"query_{index}" for index in range(len(queries))]
+    elif len(queries) != len(query_names):
+        raise ValueError("QueryNamesLengthBad",
+                         f"The query_names length {len(query_names)} must be empty or equal" +
+                         f"to the queries length {len(queries)}.")
+    elif len(set(query_names)) != len(query_names):
+        raise ValueError("DuplicateQueryNames", f"The query names {str(query_names)} list has duplicates")
+    for index, query in enumerate(queries):
+        if not query:
+            raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be empty")
+        elif isinstance(query, str):
+            try:
+                next_query = QueryParser(query)
+            except Exception:
+                raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
+        else:
+            next_query = query
+        expression_parsers.append(next_query)
+    return expression_parsers, query_names
+
+
+def search_strings(hed_strings, queries, query_names=None):
+    """ Returns a DataFrame of factors based on results of queries.
+
+    Parameters:
+        hed_strings (list):  A list of HedString objects (empty entries or None entries are 0's)
+        queries (list):  A list of query strings or QueryParser objects
+        query_names (list): A list of column names for results of queries. If missing --- query_1, query_2, etc.
+
+    Returns:
+        DataFrame - containing the factor vectors with results of the queries
+
+    Raises:   
+        ValueError - if query names are invalid or duplicated.
+            
+    """
+
+    expression_parsers, query_names = get_expression_parsers(queries, query_names=query_names)
+    df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names)
+    for parse_ind, parser in enumerate(expression_parsers):
+        for index, next_item in enumerate(hed_strings):
+            match = parser.search(next_item)
+            if match:
+                df_factors.at[index, query_names[parse_ind]] = 1
+    return df_factors
+
 # def get_assembled_strings(table, hed_schema=None, expand_defs=False):
 #     """ Return HED string objects for a tabular file.
 # 
@@ -61,7 +124,7 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
 #     return hed_list
 # 
 
-# def search_tabular(data_input, hed_schema, query, columns_included=None):
+# def search_tabular(data_input, sidecar, hed_schema, query, extra_def_dicts=None, columns_included=None):
 #     """ Return a dataframe with results of query.
 # 
 #     Parameters:
@@ -76,7 +139,8 @@ def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs
 #     """
 # 
 #     eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-#     hed_list = get_assembled_strings(data_input, hed_schema=hed_schema, expand_defs=True)
+#     hed_list, definitions = df_util.get_assembled(data_input, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
+#                                                   shrink_defs=False, expand_defs=True)
 #     expression = QueryParser(query)
 #     hed_tags = []
 #     row_numbers = []
diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py
index 930f1353f..ae1f35e63 100644
--- a/hed/tools/remodeling/operations/factor_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py
@@ -8,6 +8,7 @@
 from hed.models.sidecar import Sidecar
 from hed.models.expression_parser import QueryParser
 from hed.models.df_util import get_assembled
+from hed.tools.analysis.analysis_util import get_expression_parsers, search_strings
 
 
 class FactorHedTagsOp(BaseOp):
@@ -65,21 +66,8 @@ def __init__(self, parameters):
         self.queries = parameters['queries']
         self.query_names = parameters['query_names']
         self.remove_types = parameters['remove_types']
-        if not self.query_names:
-            self.query_names = [f"query_{index}" for index in range(len(self.queries))]
-        elif len(self.queries) != len(self.query_names):
-            raise ValueError("QueryNamesLengthBad",
-                             f"The query_names length {len(self.query_names)} must be empty or equal" +
-                             f"to the queries length {len(self.queries)} .")
-        elif len(set(self.query_names)) != len(self.query_names):
-            raise ValueError("DuplicateQueryNames",  f"The query names {str(self.query_names)} list has duplicates")
-        self.expression_parsers = []
-        for index, query in enumerate(self.queries):
-            try:
-                next_query = QueryParser(query)
-            except Exception:
-                raise ValueError("BadQuery", f"Query [{index}]: {query} cannot be parsed")
-            self.expression_parsers.append(next_query)
+        self.expression_parsers, self.query_names = get_expression_parsers(self.queries, 
+                                                                           query_names=parameters['query_names'])
 
     def do_op(self, dispatcher, df, name, sidecar=None):
         """ Factor the column using HED tag queries.
@@ -111,12 +99,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         df_list = [input_data.dataframe]
         hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, 
                                        join_columns=True, shrink_defs=False, expand_defs=True)
-        df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=self.query_names)
-        for parse_ind, parser in enumerate(self.expression_parsers):
-            for index, next_item in enumerate(hed_strings):
-                match = parser.search(next_item)
-                if match:
-                    df_factors.at[index, self.query_names[parse_ind]] = 1
+        df_factors = search_strings(hed_strings, self.expression_parsers, query_names=self.query_names)
         if len(df_factors.columns) > 0:
             df_list.append(df_factors)
         df_new = pd.concat(df_list, axis=1)
diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py
index 318c3aa54..75d143659 100644
--- a/tests/tools/analysis/test_analysis_util_assemble_hed.py
+++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py
@@ -3,9 +3,8 @@
 from pandas import DataFrame
 from hed import schema as hedschema
 from hed.models import Sidecar, TabularInput, DefinitionDict
-from hed.tools.analysis.analysis_util import assemble_hed
-
-
+from hed.models import df_util
+from hed.tools.analysis.analysis_util import assemble_hed, search_strings
 
 
 # noinspection PyBroadException
@@ -25,7 +24,6 @@ def setUpClass(cls):
         schema = hedschema.load_schema(schema_path)
         cls.schema = schema
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        cls.sidecar_path = sidecar1
         cls.sidecar1 = sidecar1
         cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
@@ -96,27 +94,29 @@ def test_assemble_hed_bad_column_no_expand(self):
         self.assertNotEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
         self.assertEqual(first_str2.find('Def-expand/'), -1, "assemble_hed with def expand has Def-expand tags")
 
-    # def test_search_tabular(self):
-    #     query1 = "sensory-event"
-    #     df1 = search_tabular(self.input_data, self.schema, query1, columns_included=None)
-    #     self.assertIsInstance(df1, DataFrame, "search_tabular returns a dataframe when the query is satisfied.")
-    #     self.assertEqual(len(df1.columns), 2, "search_tabular has the right number of columns when query okay")
-    #     self.assertEqual(len(df1.index), 155, "search_tabular has right number of rows when query okay")
-    #     query2 = 'data-feature'
-    #     df2 = search_tabular(self.input_data, self.hed_schema, query2, columns_included=None)
-    #     self.assertFalse(df2, "search_tabular returns None when query is not satisfied.")
-    # 
-    #     query3 = "sensory-event"
-    #     df3 = search_tabular(self.input_data, self.hed_schema, query3, columns_included=['event_type', 'rep_status'])
-    #     self.assertIsInstance(df3, DataFrame, "search_tabular returns a DataFrame when extra columns")
-    #     self.assertEqual(len(df3.columns), 3, "search_tabular returns right number of columns when extra columns")
-    #     self.assertEqual(len(df3.index), 155, "search_tabular has right number of rows when query okay")
-    # 
-    #     df4 = search_tabular(self.input_data, self.hed_schema, query3,
-    #                          columns_included=['onset', 'event_type', 'rep_status'])
-    #     self.assertIsInstance(df4, DataFrame, "search_tabular returns a DataFrame when extra columns")
-    #     self.assertEqual(len(df4.columns), 4, "search_tabular returns right number of columns when extra columns")
-    #     self.assertEqual(len(df4.index), 155, "search_tabular has right number of rows when query okay")
+    def test_search_strings(self):
+        hed_strings, dict1 = df_util.get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, 
+                                                   join_columns=True, shrink_defs=False, expand_defs=True)
+        queries1 = ["sensory-event"]
+        query_names1 = ["sensory"]
+        df1 = search_strings(hed_strings, queries1, query_names1)
+        self.assertIsInstance(df1, DataFrame, "search_tabular returns a dataframe when the query is satisfied.")
+        self.assertEqual(len(df1.columns), 1, "search_tabular has the right number of columns when query okay")
+        self.assertEqual(len(df1.index), 200, "search_tabular has right number of rows when query okay")
+        queries2 = ['data-feature', "sensory-event"]
+        query_names2 = ['data', 'sensory']
+        df2 = search_strings(hed_strings, queries2, query_names2)
+        self.assertEqual(len(df2.columns), 2, "search_tabular has the right number of columns when query okay")
+        self.assertEqual(len(df2.index), 200, "search_tabular has right number of rows when query okay")
+        totals = df2.sum(axis=0)
+        self.assertFalse(totals.loc['data'])
+        self.assertEqual(totals.loc['sensory'], 155)
+        queries3 = ['image', "sensory-event", "face"]
+        query_names3 = ['image', 'sensory', "faced"]
+        df3 = search_strings(hed_strings, queries3, query_names3)
+        self.assertIsInstance(df3, DataFrame, "search_tabular returns a DataFrame when extra columns")
+        self.assertEqual(len(df3.columns), 3, "search_tabular returns right number of columns when extra columns")
+        self.assertEqual(len(df3.index), 200, "search_tabular has right number of rows when query okay")
 
 
 if __name__ == '__main__':

From 6708094ed6bfc61472f9660a714cbea26c3df672 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 29 Mar 2023 19:06:01 -0500
Subject: [PATCH 19/19] Fix sorting for hed string context

---
 hed/errors/error_reporter.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py
index cb1a959d5..836ac2c4f 100644
--- a/hed/errors/error_reporter.py
+++ b/hed/errors/error_reporter.py
@@ -21,7 +21,6 @@
     ErrorContext.SIDECAR_KEY_NAME,
     ErrorContext.ROW,
     ErrorContext.COLUMN,
-    ErrorContext.HED_STRING,
     ErrorContext.SCHEMA_SECTION,
     ErrorContext.SCHEMA_TAG,
     ErrorContext.SCHEMA_ATTRIBUTE,
@@ -32,6 +31,10 @@
     ErrorContext.ROW,
 ]
 
+hed_string_sort_list = [
+    ErrorContext.HED_STRING
+]
+
 def _register_error_function(error_type, wrapper_func):
     if error_type in error_functions:
         raise KeyError(f"{error_type} defined more than once.")
@@ -186,10 +189,13 @@ def push_error_context(self, context_type, context):
 
         """
         if context is None:
+            from hed import HedString
             if context_type in int_sort_list:
                 context = 0
+            elif context_type in hed_string_sort_list:
+                context = HedString("")
             else:
-                context_type = ""
+                context = ""
         self.error_context.append((context_type, context))
 
     def pop_error_context(self):
@@ -446,10 +452,13 @@ def sort_issues(issues, reverse=False):
     Returns:
         list: The sorted list of issues."""
     def _get_keys(d):
+        from hed import HedString
         result = []
         for key in default_sort_list:
             if key in int_sort_list:
                 result.append(d.get(key, -1))
+            elif key in hed_string_sort_list:
+                result.append(d.get(key, HedString("")))
             else:
                 result.append(d.get(key, ""))
         return tuple(result)