diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 00f57b2be..bd0a8826d 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -37,8 +37,8 @@ def __init__(self, operation_list, data_root=None, :raises ValueError: - If any of the operations cannot be parsed correctly. - """ + self.data_root = data_root self.backup_name = backup_name self.backup_man = None @@ -60,7 +60,6 @@ def get_summaries(self, file_formats=['.txt', '.json']): Returns: list: A list of dictionaries of summaries keyed to filenames. - """ summary_list = [] @@ -101,9 +100,9 @@ def get_data_file(self, file_designator): In this case, the corresponding backup file is read and returned. - If a string is passed and there is no backup manager, the data file corresponding to the file_designator is read and returned. - - If a Pandas DataFrame, return a copy. - + - If a Pandas DataFrame, return a copy. """ + if isinstance(file_designator, pd.DataFrame): return file_designator.copy() if self.backup_man: @@ -126,7 +125,6 @@ def get_summary_save_dir(self): :raises HedFileError: - If this dispatcher does not have a data_root. - """ if self.data_root: @@ -143,7 +141,6 @@ def run_operations(self, file_path, sidecar=None, verbose=False): Returns: DataFrame: The processed dataframe. - """ # string to functions @@ -173,8 +170,8 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s - "consolidated" means that the overall summary and summaries of individual files are in one summary file. - "individual" means that the summaries of individual files are in separate files. - "none" means that only the overall summary is produced. - """ + if not save_formats: return if not summary_dir: @@ -185,6 +182,15 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s @staticmethod def parse_operations(operation_list): + """ Return a parsed a list of remodeler operations. + + Parameters: + operation_list (list): List of JSON remodeler operations. + + Returns: + list: List of Python objects containing parsed remodeler operations. + """ + operations = [] for index, item in enumerate(operation_list): new_operation = valid_operations[item["operation"]](item["parameters"]) @@ -197,8 +203,8 @@ def prep_data(df): Parameters: df (DataFrame) - The DataFrame to be processed. - """ + result = df.replace('n/a', np.nan) # Comment in the next line if this behavior was actually needed, but I don't think it is. # result = result.infer_objects(copy=False) @@ -206,15 +212,15 @@ def prep_data(df): @staticmethod def post_proc_data(df): - """ Replace all nan entries with 'n/a' for BIDS compliance + """ Replace all nan entries with 'n/a' for BIDS compliance. Parameters: df (DataFrame): The DataFrame to be processed. Returns: - DataFrame: DataFrame with the 'np.NAN replaced by 'n/a' - + DataFrame: DataFrame with the 'np.NAN replaced by 'n/a'. """ + dtypes = df.dtypes.to_dict() for col_name, typ in dtypes.items(): if typ == 'category': @@ -232,10 +238,9 @@ def errors_to_str(messages, title="", sep='\n'): Returns: str: Single string representing the messages. - - """ - error_list = [0]*len(messages) + + error_list = [0] * len(messages) for index, message in enumerate(messages): error_list[index] = f"Operation[{message.get('index', None)}] " + \ f"has error:{message.get('error_type', None)}" + \ @@ -255,8 +260,8 @@ def get_schema(hed_versions): Returns: HedSchema or HedSchemaGroup: Objects loaded from the hed_versions specification. - """ + if not hed_versions: return None elif isinstance(hed_versions, str) or isinstance(hed_versions, list): diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index c9eb9f5ea..8db7e1aa1 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -74,7 +74,43 @@ class SummarizeHedTagsOp(BaseOp): "type": "boolean" }, "word_cloud": { - "type": "boolean" + "type": "object", + "properties": { + "height": { + "type": "integer" + }, + "width": { + "type": "integer" + }, + "prefer_horizontal": { + "type": "number" + }, + "min_font_size": { + "type": "number" + }, + "max_font_size": { + "type": "number" + }, + "scale_adjustment": { + "type": "number" + }, + "contour_width": { + "type": "number" + }, + "contour_color": { + "type": "string" + }, + "background_color": { + "type": "string" + }, + "use_mask": { + "type": "boolean" + }, + "mask_path": { + "type": "string" + } + }, + "additionalProperties": False }, }, "required": [ @@ -102,7 +138,26 @@ def __init__(self, parameters): self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) self.remove_types = parameters.get("remove_types", []) - self.word_cloud = parameters.get("word_cloud", False) + if "word_cloud" not in parameters: + self.word_cloud = None + else: + wc_params = parameters["word_cloud"] + self.word_cloud = { + "height": wc_params.get("height", 300), + "width": wc_params.get("width", 400), + "prefer_horizontal": wc_params.get("prefer_horizontal", 0.75), + "min_font_size": wc_params.get("min_font_size", 8), + "max_font_size": wc_params.get("max_font_size", 15), + "scale_adjustment": wc_params.get("scale_adjustment", 7), + "contour_width": wc_params.get("contour_width", 3), + "contour_color": wc_params.get("contour_color", 'black'), + "background_color": wc_params.get("background_color", None), + "use_mask": wc_params.get("use_mask", False), + "mask_path": wc_params.get("mask_path", None) + } + if self.word_cloud["use_mask"] and not self.word_cloud["mask_path"]: + self.word_cloud["mask_path"] = os.path.realpath(os.path.join(os.path.dirname(__file__), + '../../../resources/word_cloud_brain_mask.png')) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -144,6 +199,7 @@ def __init__(self, sum_op): sum_op (BaseOp): Operation associated with this summary. """ + super().__init__(sum_op) self.sum_op = sum_op @@ -237,31 +293,35 @@ def save_visualizations(self, save_dir, file_formats=['.svg'], individual_summar """ if not self.sum_op.word_cloud: return + else: + wc = self.sum_op.word_cloud # summary = self.get_summary(individual_summaries='none') summary = self.get_summary(individual_summaries='none') overall_summary = summary.get("Dataset", {}) overall_summary = overall_summary.get("Overall summary", {}) specifics = overall_summary.get("Specifics", {}) - word_dict = self.summary_to_dict(specifics) - width = 400 - height = 300 - mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), - '../../../resources/word_cloud_brain_mask.png')) - tag_wc = create_wordcloud(word_dict, mask_path=mask_path, width=width, height=height) + word_dict = self.summary_to_dict(specifics, scale_adjustment=wc["scale_adjustment"]) + + tag_wc = create_wordcloud(word_dict, mask_path=wc["mask_path"], width=wc["width"], height=wc["height"], + prefer_horizontal=wc["prefer_horizontal"], background_color=wc["background_color"], + min_font_size=wc["min_font_size"], max_font_size=wc["max_font_size"], + contour_width=wc["contour_width"], contour_color=wc["contour_color"]) svg_data = word_cloud_to_svg(tag_wc) - cloud_filename = os.path.realpath(os.path.join(save_dir, self.op.summary_name, '_word_cloud.svg')) + cloud_filename = os.path.realpath(os.path.join(save_dir, self.sum_op.summary_name, + self.sum_op.summary_name + '_word_cloud.svg')) with open(cloud_filename, "w") as outfile: outfile.writelines(svg_data) @staticmethod - def summary_to_dict(specifics, transform=np.log10, adjustment=7): + def summary_to_dict(specifics, transform=np.log10, scale_adjustment=7): """Convert a HedTagSummary json specifics dict into the word cloud input format. Parameters: specifics(dict): Dictionary with keys "Main tags" and "Other tags". transform(func): The function to transform the number of found tags. Default log10 - adjustment(int): Value added after transform. + scale_adjustment(int): Value added after transform. + Returns: word_dict(dict): a dict of the words and their occurrence count. @@ -278,10 +338,10 @@ def transform(x): if tag == "Exclude tags": continue for tag_sub_dict in tag_sub_list: - word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment + word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + scale_adjustment other_dict = specifics.get("Other tags", []) for tag_sub_list in other_dict: - word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + adjustment + word_dict[tag_sub_list['tag']] = transform(tag_sub_list['events']) + scale_adjustment return word_dict @staticmethod diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py index aacd5ec38..aa2e73ea5 100644 --- a/hed/tools/visualization/__init__.py +++ b/hed/tools/visualization/__init__.py @@ -1,4 +1,4 @@ """ Visualization tools for HED. """ -from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg +from .tag_word_cloud import create_wordcloud, word_cloud_to_svg diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index 5ff64b8b9..5779bb4ad 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -41,7 +41,8 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 kwargs.setdefault('color_func', default_color_func) kwargs.setdefault('relative_scaling', 1) kwargs.setdefault('max_font_size', height / 20) - kwargs.setdefault('min_font_size', 8) + kwargs.setdefault('min_font_size', 8), + wc = WordCloud(background_color=background_color, mask=mask_image, width=width, height=height, mode="RGBA", **kwargs) @@ -66,35 +67,6 @@ def word_cloud_to_svg(wc): return svg_string -def summary_to_dict(summary, transform=np.log10, adjustment=5): - """Convert a HedTagSummary JSON dict into the word cloud input format. - - Parameters: - summary(dict): The summary from a SummarizeHedTagsOp. - transform(func): The function to transform the number of found tags (Default log10). - adjustment(int): Value added after transform. - - Returns: - word_dict(dict): A dict of the words and their occurrence count. - - :raises KeyError: - A malformed dictionary was passed. - - """ - if transform is None: - def transform(x): - return x - overall_summary = summary.get("Overall summary", {}) - specifics = overall_summary.get("Specifics", {}) - tag_dict = specifics.get("Main tags", {}) - word_dict = {} - for tag_sub_list in tag_dict.values(): - for tag_sub_dict in tag_sub_list: - word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment - - return word_dict - - def load_and_resize_mask(mask_path, width=None, height=None): """ Load a mask image and resize it according to given dimensions. diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index c7ec6e4c8..47767294a 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -274,7 +274,7 @@ def test_convert_summary_to_word_dict(self): } expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7} - word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, adjustment=0) + word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, scale_adjustment=0) self.assertEqual(word_dict, expected_output)