Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def __init__(self, operation_list, data_root=None,

:raises ValueError:
- If any of the operations cannot be parsed correctly.

"""

self.data_root = data_root
self.backup_name = backup_name
self.backup_man = None
Expand All @@ -60,7 +60,6 @@ def get_summaries(self, file_formats=['.txt', '.json']):

Returns:
list: A list of dictionaries of summaries keyed to filenames.

"""

summary_list = []
Expand Down Expand Up @@ -101,9 +100,9 @@ def get_data_file(self, file_designator):
In this case, the corresponding backup file is read and returned.
- If a string is passed and there is no backup manager,
the data file corresponding to the file_designator is read and returned.
- If a Pandas DataFrame, return a copy.

- If a Pandas DataFrame, return a copy.
"""

if isinstance(file_designator, pd.DataFrame):
return file_designator.copy()
if self.backup_man:
Expand All @@ -126,7 +125,6 @@ def get_summary_save_dir(self):

:raises HedFileError:
- If this dispatcher does not have a data_root.

"""

if self.data_root:
Expand All @@ -143,7 +141,6 @@ def run_operations(self, file_path, sidecar=None, verbose=False):

Returns:
DataFrame: The processed dataframe.

"""

# string to functions
Expand Down Expand Up @@ -173,8 +170,8 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s
- "consolidated" means that the overall summary and summaries of individual files are in one summary file.
- "individual" means that the summaries of individual files are in separate files.
- "none" means that only the overall summary is produced.

"""

if not save_formats:
return
if not summary_dir:
Expand All @@ -185,6 +182,15 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s

@staticmethod
def parse_operations(operation_list):
""" Return a parsed a list of remodeler operations.

Parameters:
operation_list (list): List of JSON remodeler operations.

Returns:
list: List of Python objects containing parsed remodeler operations.
"""

operations = []
for index, item in enumerate(operation_list):
new_operation = valid_operations[item["operation"]](item["parameters"])
Expand All @@ -197,24 +203,24 @@ def prep_data(df):

Parameters:
df (DataFrame) - The DataFrame to be processed.

"""

result = df.replace('n/a', np.nan)
# Comment in the next line if this behavior was actually needed, but I don't think it is.
# result = result.infer_objects(copy=False)
return result

@staticmethod
def post_proc_data(df):
""" Replace all nan entries with 'n/a' for BIDS compliance
""" Replace all nan entries with 'n/a' for BIDS compliance.

Parameters:
df (DataFrame): The DataFrame to be processed.

Returns:
DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'

DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'.
"""

dtypes = df.dtypes.to_dict()
for col_name, typ in dtypes.items():
if typ == 'category':
Expand All @@ -232,10 +238,9 @@ def errors_to_str(messages, title="", sep='\n'):

Returns:
str: Single string representing the messages.


"""
error_list = [0]*len(messages)

error_list = [0] * len(messages)
for index, message in enumerate(messages):
error_list[index] = f"Operation[{message.get('index', None)}] " + \
f"has error:{message.get('error_type', None)}" + \
Expand All @@ -255,8 +260,8 @@ def get_schema(hed_versions):

Returns:
HedSchema or HedSchemaGroup: Objects loaded from the hed_versions specification.

"""

if not hed_versions:
return None
elif isinstance(hed_versions, str) or isinstance(hed_versions, list):
Expand Down
86 changes: 73 additions & 13 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,43 @@ class SummarizeHedTagsOp(BaseOp):
"type": "boolean"
},
"word_cloud": {
"type": "boolean"
"type": "object",
"properties": {
"height": {
"type": "integer"
},
"width": {
"type": "integer"
},
"prefer_horizontal": {
"type": "number"
},
"min_font_size": {
"type": "number"
},
"max_font_size": {
"type": "number"
},
"scale_adjustment": {
"type": "number"
},
"contour_width": {
"type": "number"
},
"contour_color": {
"type": "string"
},
"background_color": {
"type": "string"
},
"use_mask": {
"type": "boolean"
},
"mask_path": {
"type": "string"
}
},
"additionalProperties": False
},
},
"required": [
Expand Down Expand Up @@ -102,7 +138,26 @@ def __init__(self, parameters):
self.include_context = parameters.get('include_context', True)
self.replace_defs = parameters.get("replace_defs", True)
self.remove_types = parameters.get("remove_types", [])
self.word_cloud = parameters.get("word_cloud", False)
if "word_cloud" not in parameters:
self.word_cloud = None
else:
wc_params = parameters["word_cloud"]
self.word_cloud = {
"height": wc_params.get("height", 300),
"width": wc_params.get("width", 400),
"prefer_horizontal": wc_params.get("prefer_horizontal", 0.75),
"min_font_size": wc_params.get("min_font_size", 8),
"max_font_size": wc_params.get("max_font_size", 15),
"scale_adjustment": wc_params.get("scale_adjustment", 7),
"contour_width": wc_params.get("contour_width", 3),
"contour_color": wc_params.get("contour_color", 'black'),
"background_color": wc_params.get("background_color", None),
"use_mask": wc_params.get("use_mask", False),
"mask_path": wc_params.get("mask_path", None)
}
if self.word_cloud["use_mask"] and not self.word_cloud["mask_path"]:
self.word_cloud["mask_path"] = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../../resources/word_cloud_brain_mask.png'))

def do_op(self, dispatcher, df, name, sidecar=None):
""" Summarize the HED tags present in the dataset.
Expand Down Expand Up @@ -144,6 +199,7 @@ def __init__(self, sum_op):
sum_op (BaseOp): Operation associated with this summary.

"""

super().__init__(sum_op)
self.sum_op = sum_op

Expand Down Expand Up @@ -237,31 +293,35 @@ def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summar
"""
if not self.sum_op.word_cloud:
return
else:
wc = self.sum_op.word_cloud
# summary = self.get_summary(individual_summaries='none')
summary = self.get_summary(individual_summaries='none')
overall_summary = summary.get("Dataset", {})
overall_summary = overall_summary.get("Overall summary", {})
specifics = overall_summary.get("Specifics", {})
word_dict = self.summary_to_dict(specifics)
width = 400
height = 300
mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../../resources/word_cloud_brain_mask.png'))
tag_wc = create_wordcloud(word_dict, mask_path=mask_path, width=width, height=height)
word_dict = self.summary_to_dict(specifics, scale_adjustment=wc["scale_adjustment"])

tag_wc = create_wordcloud(word_dict, mask_path=wc["mask_path"], width=wc["width"], height=wc["height"],
prefer_horizontal=wc["prefer_horizontal"], background_color=wc["background_color"],
min_font_size=wc["min_font_size"], max_font_size=wc["max_font_size"],
contour_width=wc["contour_width"], contour_color=wc["contour_color"])
svg_data = word_cloud_to_svg(tag_wc)
cloud_filename = os.path.realpath(os.path.join(save_dir, self.op.summary_name, '_word_cloud.svg'))
cloud_filename = os.path.realpath(os.path.join(save_dir, self.sum_op.summary_name,
self.sum_op.summary_name + '_word_cloud.svg'))
with open(cloud_filename, "w") as outfile:
outfile.writelines(svg_data)

@staticmethod
def summary_to_dict(specifics, transform=np.log10, adjustment=7):
def summary_to_dict(specifics, transform=np.log10, scale_adjustment=7):
"""Convert a HedTagSummary json specifics dict into the word cloud input format.

Parameters:
specifics(dict): Dictionary with keys "Main tags" and "Other tags".
transform(func): The function to transform the number of found tags.
Default log10
adjustment(int): Value added after transform.
scale_adjustment(int): Value added after transform.

Returns:
word_dict(dict): a dict of the words and their occurrence count.

Expand All @@ -278,10 +338,10 @@ def transform(x):
if tag == "Exclude tags":
continue
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + scale_adjustment
other_dict = specifics.get("Other tags", [])
for tag_sub_list in other_dict:
word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + adjustment
word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + scale_adjustment
return word_dict

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg
from .tag_word_cloud import create_wordcloud, word_cloud_to_svg

32 changes: 2 additions & 30 deletions hed/tools/visualization/tag_word_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400
kwargs.setdefault('color_func', default_color_func)
kwargs.setdefault('relative_scaling', 1)
kwargs.setdefault('max_font_size', height / 20)
kwargs.setdefault('min_font_size', 8)
kwargs.setdefault('min_font_size', 8),


wc = WordCloud(background_color=background_color, mask=mask_image,
width=width, height=height, mode="RGBA", **kwargs)
Expand All @@ -66,35 +67,6 @@ def word_cloud_to_svg(wc):
return svg_string


def summary_to_dict(summary, transform=np.log10, adjustment=5):
"""Convert a HedTagSummary JSON dict into the word cloud input format.

Parameters:
summary(dict): The summary from a SummarizeHedTagsOp.
transform(func): The function to transform the number of found tags (Default log10).
adjustment(int): Value added after transform.

Returns:
word_dict(dict): A dict of the words and their occurrence count.

:raises KeyError:
A malformed dictionary was passed.

"""
if transform is None:
def transform(x):
return x
overall_summary = summary.get("Overall summary", {})
specifics = overall_summary.get("Specifics", {})
tag_dict = specifics.get("Main tags", {})
word_dict = {}
for tag_sub_list in tag_dict.values():
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment

return word_dict


def load_and_resize_mask(mask_path, width=None, height=None):
""" Load a mask image and resize it according to given dimensions.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def test_convert_summary_to_word_dict(self):
}
expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}

word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, adjustment=0)
word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, scale_adjustment=0)
self.assertEqual(word_dict, expected_output)


Expand Down