Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions hed/tools/remodeling/operations/base_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def get_summary_details(self, include_individual=True):
Users are expected to provide merge_all_info and get_details_dict to support this.

"""
merged_summary = self.merge_all_info()
if merged_summary:
details = self.get_details_dict(merged_summary)
merged_counts = self.merge_all_info()
if merged_counts:
details = self.get_details_dict(merged_counts)
else:
details = "Overall summary unavailable"

Expand Down Expand Up @@ -220,6 +220,11 @@ def get_details_dict(self, summary_info):
Notes:
Abstract method be implemented by each individual summary.

Notes:
The expected return value is a dictionary of the form:

{"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": {}}"

"""
raise NotImplementedError

Expand Down
15 changes: 10 additions & 5 deletions hed/tools/remodeling/operations/summarize_column_names_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def get_details_dict(self, column_summary):
return {"Name": summary['Summary name'], "Total events": "n/a",
"Total files": summary['Number files'],
"Files": [name for name in column_summary.file_dict.keys()],
"Columns": summary['Columns']}
"Specifics": {"Columns": summary['Columns']}}

def merge_all_info(self):
""" Create a ColumnNameSummary containing the overall dataset summary.
Expand Down Expand Up @@ -140,8 +140,11 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT):
"""
if name == "Dataset":
return self._get_dataset_string(result, indent)
columns = result["Columns"][0]
return f"{indent}{str(columns['Column names'])}"
columns = result.get("Specifics", {}).get("Columns", [])
if columns:
return f"{indent}{str(columns[0])}"
else:
return ""

@staticmethod
def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT):
Expand All @@ -155,8 +158,10 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT):
str: Formatted string suitable for saving in a file or printing.

"""
sum_list = [f"Dataset: Number of files={result.get('Number files', 0)}"]
for element in result.get("Unique headers", []):
sum_list = [f"Dataset: Number of files={result.get('Total files', 0)}"]
specifics = result.get("Specifics", {})
columns = specifics.get("Columns", {})
for element in columns:
sum_list.append(f"{indent}Columns: {str(element['Column names'])}")
for file in element.get("Files", []):
sum_list.append(f"{indent}{indent}{file}")
Expand Down
15 changes: 12 additions & 3 deletions hed/tools/remodeling/operations/summarize_column_values_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,14 @@ def get_details_dict(self, summary):
for key, dict_entry in this_summary['Categorical columns'].items():
num_disp, sorted_tuples = ColumnValueSummary.sort_dict(dict_entry, reverse=True)
this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)])
return this_summary
return {"Name": this_summary['Name'], "Total events": this_summary["Total events"],
"Total files": this_summary['Total files'],
"Files": [name for name in this_summary['Files'].keys()],
"Specifics": {"Value columns": this_summary['Value columns'].keys(),
"Skip columns": this_summary['Skip columns'],
"Value columns": this_summary['Value columns'],
"Categorical columns": this_summary['Categorical columns'],
"Categorical counts": this_summary['Categorical counts']}}

def merge_all_info(self):
""" Create a TabularSummary containing the overall dataset summary.
Expand Down Expand Up @@ -198,10 +205,11 @@ def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
"""
sum_list = [f"Dataset: Total events={result.get('Total events', 0)} "
f"Total files={result.get('Total files', 0)}"]
cat_string = self._get_categorical_string(result, offset="", indent=indent)
specifics = result["Specifics"]
cat_string = self._get_categorical_string(specifics, offset="", indent=indent)
if cat_string:
sum_list.append(cat_string)
val_cols = result.get("Value columns", {})
val_cols = specifics.get("Value columns", {})
if val_cols:
sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent))
return "\n".join(sum_list)
Expand All @@ -219,6 +227,7 @@ def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):

"""
sum_list = [f"Total events={result.get('Total events', 0)}"]
specifics = result.get("Specifics", {})
cat_cols = result.get("Categorical columns", {})
if cat_cols:
sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent))
Expand Down
2 changes: 2 additions & 0 deletions hed/tools/remodeling/operations/summarize_definitions_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ def get_details_dict(self, def_gatherer):

known_defs_summary.update(ambiguous_defs_summary)
known_defs_summary.update(errors_summary)
return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary}

return known_defs_summary

def merge_all_info(self):
Expand Down
12 changes: 6 additions & 6 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,24 +118,24 @@ def update_summary(self, new_info):
counts.update_event_counts(hed, new_info['name'])
self.summary_dict[new_info["name"]] = counts

def get_details_dict(self, merge_counts):
def get_details_dict(self, tag_counts):
""" Return the summary-specific information in a dictionary.

Parameters:
merge_counts (HedTagCounts): Contains the counts of tags in the dataset.
tag_counts (HedTagCounts): Contains the counts of tags in the dataset.

Returns:
dict: dictionary with the summary results.

"""
template, unmatched = merge_counts.organize_tags(self.tags)
template, unmatched = tag_counts.organize_tags(self.tags)
details = {}
for key, key_list in self.tags.items():
details[key] = self._get_details(key_list, template, verbose=True)
leftovers = [value.get_info(verbose=True) for value in unmatched]
return {"Name": merge_counts.name, "Total events": merge_counts.total_events,
"Total files": len(merge_counts.files.keys()),
"Files": [name for name in merge_counts.files.keys()],
return {"Name": tag_counts.name, "Total events": tag_counts.total_events,
"Total files": len(tag_counts.files.keys()),
"Files": [name for name in tag_counts.files.keys()],
"Specifics": {"Main tags": details, "Other tags": leftovers}}

def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT):
Expand Down
29 changes: 18 additions & 11 deletions hed/tools/remodeling/operations/summarize_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def update_summary(self, new_info):
Parameters:
new_info (dict): A dictionary with the parameters needed to update a summary.

Notes:
Notes:
- The summary needs a "name" str, a "schema", a "df, and a "Sidecar".

"""
Expand All @@ -104,7 +104,7 @@ def update_summary(self, new_info):
if sidecar and not isinstance(sidecar, Sidecar):
sidecar = Sidecar(sidecar)
input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name'])
hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'],
hed_strings, definitions = get_assembled(input_data, sidecar, new_info['schema'],
extra_def_dicts=None, join_columns=True, expand_defs=False)
context_manager = HedContextManager(hed_strings, new_info['schema'])
type_values = HedTypeValues(context_manager, definitions, new_info['name'], type_tag=self.type_tag)
Expand All @@ -124,7 +124,12 @@ def get_details_dict(self, counts):
dict: dictionary with the summary results.

"""
return counts.get_summary()
summary = counts.get_summary()
files = summary.get('files', [])
return {"Name": summary.get("name", ""), "Total events": summary.get("total_events", 0),
"Total files": len(files), "Files": files,
"Specifics": {"Type tag": summary.get('type_tag', 'condition-variable'),
"Type info": summary.get('details', {})}}

def merge_all_info(self):
""" Create a HedTypeCounts containing the overall dataset HED type summary.
Expand Down Expand Up @@ -170,11 +175,12 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT):
str: Formatted string suitable for saving in a file or printing.

"""
details = result.get('details', {})
sum_list = [f"Dataset: Type={result['type_tag']} Type values={len(details)} "
f"Total events={result.get('total_events', 0)} Total files={len(result.get('files', []))}"]
specifics = result.get('Specifics', {})
type_info = specifics.get('Type info', {})
sum_list = [f"Dataset: Type={specifics.get('Type tag', 'condition-variable')} Type values={len(type_info)} "
f"Total events={result.get('Total events', 0)} Total files={len(result.get('Files', []))}"]

for key, item in details.items():
for key, item in type_info.items():
str1 = f"{item['events']} event(s) out of {item['total_events']} total events in " + \
f"{len(item['files'])} file(s)"
if item['level_counts']:
Expand All @@ -200,11 +206,12 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT):
str: Formatted string suitable for saving in a file or printing.

"""
details = result.get('details', {})
sum_list = [f"Type={result['type_tag']} Type values={len(details)} "
f"Total events={result.get('total_events', 0)}"]
specifics = result.get('Specifics', {})
type_info = specifics.get('Type info', {})
sum_list = [f"Type={specifics.get('Type tag', 'condition-variable')} Type values={len(type_info)} "
f"Total events={result.get('Total events', 0)}"]

for key, item in details.items():
for key, item in type_info.items():
sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events")
str1 = ""
if item['direct_references']:
Expand Down
Loading