diff --git a/src/python/evaluation/plots/README.md b/src/python/evaluation/plots/README.md index bacce3d2..be7e8797 100644 --- a/src/python/evaluation/plots/README.md +++ b/src/python/evaluation/plots/README.md @@ -96,18 +96,20 @@ The script can build the following charts: Run the [raw_issues_statistics_plotter.py](raw_issues_statistics_plotter.py) with the arguments from command line. **Required arguments**: -1. `stats` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py). +1. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1). 2. `save_dir` — directory where the plotted charts will be saved. -3. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1). **Optional arguments**: Argument | Description --- | --- **‑‑file‑extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`. +**‑‑group‑stats** | If present, there will be several languages on the charts at once. ### Config -The configuration file is a dictionary in yaml format, where for each column of the original dataset the types of graphs to be plotted are specified. You can also put the common parameters when plotting multiple graphs for one column in a separate `common` group. +The configuration file is a dictionary in yaml format, where +1) paths to datasets with statistics are specified +2) for each column of the original dataset, the types of graphs to be plotted are specified. You can also put the general parameters when plotting multiple graphs for one column in a separate `common` group. **Possible values of the charts**: * `line_chart` @@ -141,10 +143,18 @@ The result will be two graphs: line chart and histogram. The values in both char ### Examples #### Line chart - +

+ + +

#### Box plot - +

+ + +

#### Histogram - \ No newline at end of file +

+ +

diff --git a/src/python/evaluation/plots/common/plotly_consts.py b/src/python/evaluation/plots/common/plotly_consts.py index fc4f703a..5f0e6d85 100644 --- a/src/python/evaluation/plots/common/plotly_consts.py +++ b/src/python/evaluation/plots/common/plotly_consts.py @@ -1,5 +1,7 @@ from enum import Enum +import plotly.express as px + class MARGIN(Enum): ZERO = {'l': 0, 'r': 0, 'b': 0, 't': 0} @@ -13,7 +15,10 @@ class SORT_ORDER(Enum): # noqa: N801 class COLOR(Enum): - # Colors from px.colors.DEFAULT_PLOTLY_COLORS + """ + Colors from px.colors.DEFAULT_PLOTLY_COLORS + """ + BLUE = "rgb(31, 119, 180)" ORANGE = "rgb(255, 127, 14)" GREEN = "rgb(44, 160, 44)" @@ -24,3 +29,29 @@ class COLOR(Enum): GRAY = "rgb(127, 127, 127)" YELLOW = "rgb(188, 189, 34)" CYAN = "rgb(23, 190, 207)" + + +class COLORWAY(Enum): # noqa: N801 + """ + Colors from px.colors.qualitative + """ + + PLOTLY = px.colors.qualitative.Plotly + D3 = px.colors.qualitative.D3 + G10 = px.colors.qualitative.G10 + T10 = px.colors.qualitative.T10 + ALPHABET = px.colors.qualitative.Alphabet + DARK24 = px.colors.qualitative.Dark24 + LIGHT24 = px.colors.qualitative.Light24 + SET1 = px.colors.qualitative.Set1 + PASTEL1 = px.colors.qualitative.Pastel1 + DARK2 = px.colors.qualitative.Dark2 + SET2 = px.colors.qualitative.Set2 + PASTEL2 = px.colors.qualitative.Pastel2 + SET3 = px.colors.qualitative.Set3 + ANTIQUE = px.colors.qualitative.Antique + BOLD = px.colors.qualitative.Bold + PASTEL = px.colors.qualitative.Pastel + PRISM = px.colors.qualitative.Prism + SAFE = px.colors.qualitative.Safe + VIVID = px.colors.qualitative.Vivid diff --git a/src/python/evaluation/plots/common/utils.py b/src/python/evaluation/plots/common/utils.py index 5c14d07c..4ee1fb5c 100644 --- a/src/python/evaluation/plots/common/utils.py +++ b/src/python/evaluation/plots/common/utils.py @@ -8,50 +8,101 @@ from src.python.evaluation.plots.common import plotly_consts from src.python.review.common.file_system import Extension +COLOR = Optional[plotly_consts.COLOR] +COLORWAY = Optional[plotly_consts.COLORWAY] +MARGIN = Optional[plotly_consts.MARGIN] +SORT_ORDER = Optional[plotly_consts.SORT_ORDER] +LINES = Optional[Dict[int, Optional[str]]] + def get_supported_extensions() -> List[str]: extensions = Extension.get_image_extensions() extensions.append(Extension.JSON) + extensions.append(Extension.HTML) return [extension.value for extension in extensions] def create_bar_plot( df: pd.DataFrame, + *, x_axis: str, y_axis: str, - margin: Optional[plotly_consts.MARGIN] = None, - sort_order: Optional[plotly_consts.SORT_ORDER] = None, - color: Optional[plotly_consts.COLOR] = None, + margin: MARGIN = None, + sort_order: SORT_ORDER = None, + color: COLOR = None, ) -> go.Figure: fig = px.bar(df, x=x_axis, y=y_axis, text=y_axis) - update_figure(fig, margin, sort_order, color) + update_figure(fig, margin=margin, sort_order=sort_order, color=color) return fig +def create_box_trace( + df: pd.DataFrame, + *, + x_column: Optional[str] = None, + y_column: Optional[str] = None, + color: COLOR = None, +) -> go.Box: + return go.Box( + x=df[x_column] if x_column is not None else None, + y=df[y_column] if y_column is not None else None, + line={'color': color.value if color is not None else None}, + ) + + def create_box_plot( df: pd.DataFrame, - x_axis: str, - y_axis: str, - margin: Optional[plotly_consts.MARGIN] = None, - sort_order: Optional[plotly_consts.SORT_ORDER] = None, - color: Optional[plotly_consts.COLOR] = None, - horizontal_lines: Optional[Dict[int, Optional[str]]] = None, + *, + x_axis: Optional[str], + y_axis: Optional[str], + margin: MARGIN = None, + sort_order: SORT_ORDER = None, + color: COLOR = None, + horizontal_lines: LINES = None, ) -> go.Figure: - fig = px.box(df, x=x_axis, y=y_axis) - update_figure(fig, margin=margin, sort_order=sort_order, color=color, horizontal_lines=horizontal_lines) + fig = go.Figure(create_box_trace(df, x_column=x_axis, y_column=y_axis, color=color)) + update_figure( + fig, + margin=margin, + sort_order=sort_order, + horizontal_lines=horizontal_lines, + x_axis_name=x_axis, + y_axis_name=y_axis, + ) return fig +def create_scatter_trace( + df: pd.DataFrame, + *, + x_column: str, + y_column: str, + color: COLOR = None, +) -> go.Scatter: + return go.Scatter( + x=df[x_column], + y=df[y_column], + line={'color': color.value if color is not None else None}, + ) + + def create_line_chart( df: pd.DataFrame, + *, x_axis: str, y_axis: str, - margin: Optional[plotly_consts.MARGIN] = None, - color: Optional[plotly_consts.COLOR] = None, - vertical_lines: Optional[Dict[int, Optional[str]]] = None, + margin: MARGIN = None, + color: COLOR = None, + vertical_lines: LINES = None, ) -> go.Figure: - fig = px.line(df, x=x_axis, y=y_axis) - update_figure(fig, margin=margin, color=color, vertical_lines=vertical_lines) + fig = go.Figure(create_scatter_trace(df, x_column=x_axis, y_column=y_axis, color=color)) + update_figure( + fig, + margin=margin, + vertical_lines=vertical_lines, + x_axis_name=x_axis, + y_axis_name=y_axis, + ) return fig @@ -60,24 +111,31 @@ def create_histogram( x_axis: str, y_axis: str, n_bins: Optional[int] = None, - margin: Optional[plotly_consts.MARGIN] = None, - color: Optional[plotly_consts.COLOR] = None, - vertical_lines: Optional[Dict[int, Optional[str]]] = None, + margin: MARGIN = None, + color: COLOR = None, + vertical_lines: LINES = None, ) -> go.Figure: fig = px.histogram(df, x=x_axis, y=y_axis, nbins=n_bins) update_figure( - fig, margin=margin, color=color, vertical_lines=vertical_lines, x_axis_name=x_axis, y_axis_name=y_axis, + fig, + margin=margin, + color=color, + vertical_lines=vertical_lines, + x_axis_name=x_axis, + y_axis_name=y_axis, ) return fig def update_figure( fig: go.Figure, - margin: Optional[plotly_consts.MARGIN] = None, - sort_order: Optional[plotly_consts.SORT_ORDER] = None, - color: Optional[plotly_consts.COLOR] = None, - horizontal_lines: Optional[Dict[int, Optional[str]]] = None, - vertical_lines: Optional[Dict[int, Optional[str]]] = None, + *, + margin: MARGIN = None, + sort_order: SORT_ORDER = None, + color: COLOR = None, + colorway: COLORWAY = None, + horizontal_lines: LINES = None, + vertical_lines: LINES = None, x_axis_name: Optional[str] = None, y_axis_name: Optional[str] = None, ) -> None: @@ -95,6 +153,9 @@ def update_figure( if y_axis_name is not None: new_layout['yaxis_title'] = y_axis_name + if colorway is not None: + new_layout['colorway'] = colorway.value + fig.update_layout(**new_layout) new_trace = {} @@ -110,7 +171,7 @@ def update_figure( if vertical_lines is not None: for x, annotation in vertical_lines.items(): - fig.add_vline(x=x, annotation_text=annotation) + fig.add_vline(x=x, annotation_text=annotation, annotation_textangle=90) def save_plot( @@ -121,4 +182,7 @@ def save_plot( ) -> None: os.makedirs(dir_path, exist_ok=True) file = dir_path / f"{plot_name}{extension.value}" - fig.write_image(str(file)) + if extension == Extension.HTML: + fig.write_html(str(file)) + else: + fig.write_image(str(file)) diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png index b0108a1a..94b59e74 100644 Binary files a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png and b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png differ diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot_grouped.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot_grouped.png new file mode 100644 index 00000000..7965aa25 Binary files /dev/null and b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot_grouped.png differ diff --git a/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png index 6ebce84e..e3dde849 100644 Binary files a/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png and b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png differ diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png index 3cb363e3..17673eb6 100644 Binary files a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png and b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png differ diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png new file mode 100644 index 00000000..044e92c8 Binary files /dev/null and b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png differ diff --git a/src/python/evaluation/plots/plotters/diffs_plotters.py b/src/python/evaluation/plots/plotters/diffs_plotters.py index e97ceb94..0ff94b64 100644 --- a/src/python/evaluation/plots/plotters/diffs_plotters.py +++ b/src/python/evaluation/plots/plotters/diffs_plotters.py @@ -31,7 +31,9 @@ def _get_dataframe_from_dict( def _extract_stats_from_issues_statistics( - statistics: IssuesStatistics, limit: int, only_unique: bool, + statistics: IssuesStatistics, + limit: int, + only_unique: bool, ) -> Dict[IssueType, int]: categorized_statistics = statistics.get_short_categorized_statistics() @@ -61,7 +63,14 @@ def get_unique_issues_by_category( key_mapper=lambda issue_type: issue_type.name, ) - return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color) + return create_bar_plot( + df, + x_axis=x_axis_name, + y_axis=y_axis_name, + margin=margin, + sort_order=sort_order, + color=color, + ) def get_issues_by_category( @@ -82,7 +91,14 @@ def get_issues_by_category( key_mapper=lambda issue_type: issue_type.name, ) - return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color) + return create_bar_plot( + df, + x_axis=x_axis_name, + y_axis=y_axis_name, + margin=margin, + sort_order=sort_order, + color=color, + ) def get_median_penalty_influence_by_category( @@ -105,7 +121,14 @@ def get_median_penalty_influence_by_category( value_mapper=lambda influence: median(influence), ) - return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color) + return create_bar_plot( + df, + x_axis=x_axis_name, + y_axis=y_axis_name, + margin=margin, + sort_order=sort_order, + color=color, + ) def get_penalty_influence_distribution( @@ -128,4 +151,11 @@ def get_penalty_influence_distribution( ) df = df.explode(y_axis_name) - return create_box_plot(df, x_axis_name, y_axis_name, margin, sort_order, color) + return create_box_plot( + df, + x_axis=x_axis_name, + y_axis=y_axis_name, + margin=margin, + sort_order=sort_order, + color=color, + ) diff --git a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py index c545313d..abc74092 100644 --- a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py +++ b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py @@ -1,13 +1,26 @@ +import logging from dataclasses import dataclass from enum import Enum, unique -from typing import Callable, Dict, List, Optional, Tuple +from typing import Callable, Dict, Optional, Tuple import numpy as np import pandas as pd import plotly.graph_objects as go from src.python.evaluation.issues_statistics.get_raw_issues_statistics import VALUE -from src.python.evaluation.plots.common import plotly_consts -from src.python.evaluation.plots.common.utils import create_box_plot, create_histogram, create_line_chart +from src.python.evaluation.plots.common.utils import ( + COLOR, + COLORWAY, + create_box_plot, + create_box_trace, + create_histogram, + create_line_chart, + create_scatter_trace, + LINES, + MARGIN, + update_figure, +) + +logger = logging.getLogger(__name__) @unique @@ -16,7 +29,7 @@ class PlotTypes(Enum): HISTOGRAM = 'histogram' BOX_PLOT = 'box_plot' - def to_plotter_function(self) -> Callable[..., go.Figure]: + def to_plotter_function(self) -> Callable[[Dict[str, pd.DataFrame], 'PlotConfig', bool], Dict[str, go.Figure]]: type_to_function = { PlotTypes.LINE_CHART: plot_line_chart, PlotTypes.HISTOGRAM: plot_histogram, @@ -32,9 +45,10 @@ class PlotConfig: type: PlotTypes x_axis_name: Optional[str] = None y_axis_name: Optional[str] = None - margin: Optional[plotly_consts.MARGIN] = None - color: Optional[plotly_consts.COLOR] = None - boundaries: Optional[Dict[int, Optional[str]]] = None + margin: MARGIN = None + color: COLOR = None + colorway: COLORWAY = None + boundaries: LINES = None range_of_values: Optional[range] = None n_bins: Optional[int] = None @@ -52,7 +66,10 @@ def _prepare_stats(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_ # Fill in the missing intermediate values with zeros min_index, max_index = result_df.index.min(), result_df.index.max() - result_df = result_df.reindex(range(min_index, max_index + 1), fill_value=0) + if pd.isna(min_index) or pd.isna(max_index): + logger.warning(f'{config.column}: no data') + else: + result_df = result_df.reindex(range(min_index, max_index + 1), fill_value=0) result_df.reset_index(inplace=True) @@ -71,54 +88,126 @@ def _get_axis_names(config: PlotConfig, default_x_axis_name: str, default_y_axis return x_axis_name, y_axis_name -def plot_line_chart(stats: pd.DataFrame, config: PlotConfig) -> go.Figure: - x_axis_name, y_axis_name = _get_axis_names( - config, default_x_axis_name='Value', default_y_axis_name='Number of fragments', +def plot_line_chart( + stats_by_lang: Dict[str, pd.DataFrame], + config: PlotConfig, + group_stats: bool, +) -> Dict[str, go.Figure]: + x_axis_name, y_axis_name = _get_axis_names(config, default_x_axis_name='Value', default_y_axis_name='Quantity') + + if not group_stats: + plots = {} + for lang, stats in stats_by_lang.items(): + stats = _prepare_stats(stats, config, x_axis_name, y_axis_name) + plots[lang] = create_line_chart( + stats, + x_axis=x_axis_name, + y_axis=y_axis_name, + color=config.color, + margin=config.margin, + vertical_lines=config.boundaries, + ) + return plots + + plot = go.Figure() + for lang, stats in stats_by_lang.items(): + stats = _prepare_stats(stats, config, x_axis_name, y_axis_name) + trace = create_scatter_trace(stats, x_column=x_axis_name, y_column=y_axis_name) + trace.name = lang + plot.add_trace(trace) + + update_figure( + plot, + margin=config.margin, + vertical_lines=config.boundaries, + x_axis_name=x_axis_name, + y_axis_name=y_axis_name, + colorway=config.colorway, ) - stats = _prepare_stats(stats, config, x_axis_name, y_axis_name) - - return create_line_chart( - stats, x_axis_name, y_axis_name, margin=config.margin, color=config.color, vertical_lines=config.boundaries, - ) + return {'grouped': plot} -def plot_histogram(stats: pd.DataFrame, config: PlotConfig) -> go.Figure: +def plot_histogram( + stats_by_lang: Dict[str, pd.DataFrame], + config: PlotConfig, + group_stats: bool, +) -> Dict[str, go.Figure]: x_axis_name, y_axis_name = _get_axis_names( - config, default_x_axis_name='Value', default_y_axis_name='Number of fragments', + config, default_x_axis_name='Value', default_y_axis_name='Quantity', ) - stats = _prepare_stats(stats, config, x_axis_name, y_axis_name) + if group_stats: + logger.info(f'{config.column}: the histogram cannot be grouped.') - return create_histogram( - stats, - x_axis_name, - y_axis_name, - margin=config.margin, - color=config.color, - n_bins=config.n_bins, - vertical_lines=config.boundaries, - ) + plots = {} + for lang, stats in stats_by_lang.items(): + stats = _prepare_stats(stats, config, x_axis_name, y_axis_name) + plots[lang] = create_histogram( + stats, + x_axis_name, + y_axis_name, + margin=config.margin, + color=config.color, + n_bins=config.n_bins, + vertical_lines=config.boundaries, + ) + + return plots -def _get_all_values_from_stats(stats: pd.DataFrame, column_name: str) -> List[int]: - result = [] - stats.apply(lambda row: result.extend([row[VALUE]] * row[column_name]), axis=1) - return result +def _get_values_df(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_axis_name: str): + values = [] + stats.apply(lambda row: values.extend([row[VALUE]] * row[config.column]), axis=1) + + if config.range_of_values is not None: + values = [elem for elem in values if elem in config.range_of_values] + + return pd.DataFrame.from_dict({x_axis_name: config.column, y_axis_name: values}) -def plot_box_plot(stats: pd.DataFrame, config: PlotConfig) -> go.Figure: +def plot_box_plot( + stats_by_lang: Dict[str, pd.DataFrame], + config: PlotConfig, + group_stats: bool, +) -> Dict[str, go.Figure]: x_axis_name, y_axis_name = _get_axis_names( config, - default_x_axis_name="Category", + default_x_axis_name='Category', default_y_axis_name='Values', ) - values = _get_all_values_from_stats(stats, config.column) + if not group_stats: + plots = {} + for lang, stats in stats_by_lang.items(): + values_df = _get_values_df(stats, config, x_axis_name, y_axis_name) - if config.range_of_values is not None: - values = list(filter(lambda elem: elem in config.range_of_values, values)) + plots[lang] = create_box_plot( + values_df, + x_axis=x_axis_name, + y_axis=y_axis_name, + color=config.color, + margin=config.margin, + horizontal_lines=config.boundaries, + ) + return plots + + plot = go.Figure() + for lang, stats in stats_by_lang.items(): + values_df = _get_values_df(stats, config, x_axis_name, y_axis_name) - values_df = pd.DataFrame.from_dict({x_axis_name: config.column, y_axis_name: values}) + trace = create_box_trace(values_df, y_column=y_axis_name) + trace.name = lang + + plot.add_trace(trace) + + update_figure( + plot, + margin=config.margin, + horizontal_lines=config.boundaries, + x_axis_name=x_axis_name, + y_axis_name=y_axis_name, + colorway=config.colorway, + ) - return create_box_plot(values_df, x_axis_name, y_axis_name, horizontal_lines=config.boundaries) + return {'grouped': plot} diff --git a/src/python/evaluation/plots/raw_issues_statistics_plotter.py b/src/python/evaluation/plots/raw_issues_statistics_plotter.py index 02888080..6d8295ff 100644 --- a/src/python/evaluation/plots/raw_issues_statistics_plotter.py +++ b/src/python/evaluation/plots/raw_issues_statistics_plotter.py @@ -1,4 +1,5 @@ import argparse +import logging import sys from enum import Enum, unique from pathlib import Path @@ -6,7 +7,7 @@ sys.path.append('../../../..') -import pandas as pd +import plotly.graph_objects as go from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path from src.python.evaluation.plots.common import plotly_consts from src.python.evaluation.plots.common.utils import ( @@ -23,8 +24,10 @@ class ConfigFields(Enum): Y_AXIS_NAME = 'y_axis_name' MARGIN = 'margin' COLOR = 'color' + COLORWAY = 'colorway' BOUNDARIES = 'boundaries' COMMON = 'common' + STATS = 'stats' RANGE_OF_VALUES = 'range_of_values' N_BINS = 'n_bins' @@ -33,17 +36,19 @@ class ConfigFields(Enum): Y_AXIS_NAME = ConfigFields.Y_AXIS_NAME.value MARGIN = ConfigFields.MARGIN.value COLOR = ConfigFields.COLOR.value +COLORWAY = ConfigFields.COLORWAY.value BOUNDARIES = ConfigFields.BOUNDARIES.value COMMON = ConfigFields.COMMON.value +STATS = ConfigFields.STATS.value RANGE_OF_VALUES = ConfigFields.RANGE_OF_VALUES.value N_BINS = ConfigFields.N_BINS.value def configure_arguments(parser: argparse.ArgumentParser) -> None: parser.add_argument( - 'stats', + 'config_path', type=lambda value: Path(value).absolute(), - help='Path to dataset with statistics.', + help='Path to the yaml file containing information about the graphs to be plotted.', ) parser.add_argument( @@ -52,12 +57,6 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None: help='The directory where the plotted charts will be saved.', ) - parser.add_argument( - 'config_path', - type=lambda value: Path(value).absolute(), - help='Path to the yaml file containing information about the graphs to be plotted.', - ) - parser.add_argument( '--file-extension', type=str, @@ -66,6 +65,12 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None: help='Allows you to select the extension of output files.', ) + parser.add_argument( + '--group-stats', + action='store_true', + help='If present, there will be several languages on the charts at once.', + ) + def _get_plot_config( column_name: str, @@ -92,6 +97,10 @@ def _get_plot_config( if RANGE_OF_VALUES in params: params[RANGE_OF_VALUES] = range(*params[RANGE_OF_VALUES]) + if COLORWAY in params: + colorway_value = params.get(COLORWAY).upper() + params[COLORWAY] = plotly_consts.COLORWAY[colorway_value] + return PlotConfig(**params) @@ -105,14 +114,23 @@ def get_plot_configs(column_name: str, column_config: Dict) -> List[PlotConfig]: return plot_configs -def plot_and_save(config: Dict, stats: pd.DataFrame, save_dir: Path, extension: Extension) -> None: +def _save_plots(plots: Dict[str, go.Figure], save_dir: Path, extension: Extension, column: str, plot_type: str) -> None: + for output_name, plot in plots.items(): + subdir = save_dir / column + save_plot(plot, subdir, plot_name=f'{column}_{plot_type}_{output_name}', extension=extension) + + +def plot_and_save(config: Dict, save_dir: Path, extension: Extension, group_stats: bool) -> None: + stats_by_lang = { + lang: get_solutions_df_by_file_path(Path(lang_stats)) for lang, lang_stats in config.pop(STATS).items() + } + for column_name, column_config in config.items(): plot_configs = get_plot_configs(column_name, column_config) for plot_config in plot_configs: plotter_function = plot_config.type.to_plotter_function() - plot = plotter_function(stats, plot_config) - subdir = save_dir / plot_config.column - save_plot(plot, subdir, plot_name=f'{plot_config.column}_{plot_config.type.value}', extension=extension) + plots = plotter_function(stats_by_lang, plot_config, group_stats) + _save_plots(plots, save_dir, extension, plot_config.column, plot_config.type.value) def main(): @@ -120,12 +138,12 @@ def main(): configure_arguments(parser) args = parser.parse_args() - stats = get_solutions_df_by_file_path(args.stats) + logging.basicConfig(level=logging.INFO) extension = Extension(args.file_extension) config = parse_yaml(args.config_path) - plot_and_save(config, stats, args.save_dir, extension) + plot_and_save(config, args.save_dir, extension, args.group_stats) if __name__ == "__main__": diff --git a/src/python/review/common/file_system.py b/src/python/review/common/file_system.py index 952eed1a..9800d2ce 100644 --- a/src/python/review/common/file_system.py +++ b/src/python/review/common/file_system.py @@ -38,6 +38,7 @@ class Extension(Enum): CSV = '.csv' PICKLE = '.pickle' JSON = '.json' + HTML = '.html' # Image extensions PNG = '.png' diff --git a/whitelist.txt b/whitelist.txt index 53cc4701..0660d745 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -1,4 +1,14 @@ +D3 +DARK2 +DARK24 +G10 +LIGHT24 +PASTEL2 Pastel1 +SET1 +SET2 +SET3 +T10 abstractmethod arange astype @@ -175,6 +185,7 @@ subdirs sublist svg sym +textangle textposition textwrap tmp