hyperskill · nbirillo · Aug 17, 2021 · Aug 16, 2021 · Aug 16, 2021 · Aug 16, 2021
diff --git a/src/python/evaluation/plots/README.md b/src/python/evaluation/plots/README.md
@@ -96,18 +96,20 @@ The script can build the following charts:
 Run the [raw_issues_statistics_plotter.py](raw_issues_statistics_plotter.py) with the arguments from command line.
 
 **Required arguments**:
-1. `stats` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py).
+1. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).
 2. `save_dir` — directory where the plotted charts will be saved.
-3. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).
 
 **Optional arguments**:
 
 Argument | Description
 --- | ---
 **&#8209;&#8209;file&#8209;extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`.
+**&#8209;&#8209;group&#8209;stats** | If present, there will be several languages on the charts at once.
 
 ### Config
-The configuration file is a dictionary in yaml format, where for each column of the original dataset the types of graphs to be plotted are specified. You can also put the common parameters when plotting multiple graphs for one column in a separate `common` group.
+The configuration file is a dictionary in yaml format, where 
+1) paths to datasets with statistics are specified
+2) for each column of the original dataset, the types of graphs to be plotted are specified. You can also put the general parameters when plotting multiple graphs for one column in a separate `common` group.
 
 **Possible values of the charts**: 
 * `line_chart`
@@ -141,10 +143,18 @@ The result will be two graphs: line chart and histogram. The values in both char
 ### Examples
 
 #### Line chart
-<img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart.png" width="500">
+<p align="middle">
+  <img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart.png" width="49%" />
+  <img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png" width="49%" />
+</p>
 
 #### Box plot
-<img src="./examples/BEST_PRACTICES_box_plot.png" width="500">
+<p align="middle">
+  <img src="./examples/BEST_PRACTICES_box_plot.png" width="49%" />
+  <img src="./examples/BEST_PRACTICES_box_plot_grouped.png" width="49%" />
+</p>
 
 #### Histogram
-<img src="./examples/CODE_STYLE_ratio_histogram.png" width="500">
+<p align="middle">
+  <img src="./examples/CODE_STYLE_ratio_histogram.png" width="49%" />
+</p>
diff --git a/src/python/evaluation/plots/common/plotly_consts.py b/src/python/evaluation/plots/common/plotly_consts.py
@@ -1,5 +1,7 @@
 from enum import Enum
 
+import plotly.express as px
+
 
 class MARGIN(Enum):
     ZERO = {'l': 0, 'r': 0, 'b': 0, 't': 0}
@@ -13,7 +15,10 @@ class SORT_ORDER(Enum):  # noqa: N801
 
 
 class COLOR(Enum):
-    # Colors from px.colors.DEFAULT_PLOTLY_COLORS
+    """
+    Colors from px.colors.DEFAULT_PLOTLY_COLORS
+    """
+
     BLUE = "rgb(31, 119, 180)"
     ORANGE = "rgb(255, 127, 14)"
     GREEN = "rgb(44, 160, 44)"
@@ -24,3 +29,29 @@ class COLOR(Enum):
     GRAY = "rgb(127, 127, 127)"
     YELLOW = "rgb(188, 189, 34)"
     CYAN = "rgb(23, 190, 207)"
+
+
+class COLORWAY(Enum):  # noqa: N801
+    """
+    Colors from px.colors.qualitative
+    """
+
+    PLOTLY = px.colors.qualitative.Plotly
+    D3 = px.colors.qualitative.D3
+    G10 = px.colors.qualitative.G10
+    T10 = px.colors.qualitative.T10
+    ALPHABET = px.colors.qualitative.Alphabet
+    DARK24 = px.colors.qualitative.Dark24
+    LIGHT24 = px.colors.qualitative.Light24
+    SET1 = px.colors.qualitative.Set1
+    PASTEL1 = px.colors.qualitative.Pastel1
+    DARK2 = px.colors.qualitative.Dark2
+    SET2 = px.colors.qualitative.Set2
+    PASTEL2 = px.colors.qualitative.Pastel2
+    SET3 = px.colors.qualitative.Set3
+    ANTIQUE = px.colors.qualitative.Antique
+    BOLD = px.colors.qualitative.Bold
+    PASTEL = px.colors.qualitative.Pastel
+    PRISM = px.colors.qualitative.Prism
+    SAFE = px.colors.qualitative.Safe
+    VIVID = px.colors.qualitative.Vivid
diff --git a/src/python/evaluation/plots/common/utils.py b/src/python/evaluation/plots/common/utils.py
@@ -8,50 +8,101 @@
 from src.python.evaluation.plots.common import plotly_consts
 from src.python.review.common.file_system import Extension
 
+COLOR = Optional[plotly_consts.COLOR]
+COLORWAY = Optional[plotly_consts.COLORWAY]
+MARGIN = Optional[plotly_consts.MARGIN]
+SORT_ORDER = Optional[plotly_consts.SORT_ORDER]
+LINES = Optional[Dict[int, Optional[str]]]
+
 
 def get_supported_extensions() -> List[str]:
     extensions = Extension.get_image_extensions()
     extensions.append(Extension.JSON)
+    extensions.append(Extension.HTML)
     return [extension.value for extension in extensions]
 
 
 def create_bar_plot(
     df: pd.DataFrame,
+    *,
     x_axis: str,
     y_axis: str,
-    margin: Optional[plotly_consts.MARGIN] = None,
-    sort_order: Optional[plotly_consts.SORT_ORDER] = None,
-    color: Optional[plotly_consts.COLOR] = None,
+    margin: MARGIN = None,
+    sort_order: SORT_ORDER = None,
+    color: COLOR = None,
 ) -> go.Figure:
     fig = px.bar(df, x=x_axis, y=y_axis, text=y_axis)
-    update_figure(fig, margin, sort_order, color)
+    update_figure(fig, margin=margin, sort_order=sort_order, color=color)
     return fig
 
 
+def create_box_trace(
+    df: pd.DataFrame,
+    *,
+    x_column: Optional[str] = None,
+    y_column: Optional[str] = None,
+    color: COLOR = None,
+) -> go.Box:
+    return go.Box(
+        x=df[x_column] if x_column is not None else None,
+        y=df[y_column] if y_column is not None else None,
+        line={'color': color.value if color is not None else None},
+    )
+
+
 def create_box_plot(
     df: pd.DataFrame,
-    x_axis: str,
-    y_axis: str,
-    margin: Optional[plotly_consts.MARGIN] = None,
-    sort_order: Optional[plotly_consts.SORT_ORDER] = None,
-    color: Optional[plotly_consts.COLOR] = None,
-    horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
+    *,
+    x_axis: Optional[str],
+    y_axis: Optional[str],
+    margin: MARGIN = None,
+    sort_order: SORT_ORDER = None,
+    color: COLOR = None,
+    horizontal_lines: LINES = None,
 ) -> go.Figure:
-    fig = px.box(df, x=x_axis, y=y_axis)
-    update_figure(fig, margin=margin, sort_order=sort_order, color=color, horizontal_lines=horizontal_lines)
+    fig = go.Figure(create_box_trace(df, x_column=x_axis, y_column=y_axis, color=color))
+    update_figure(
+        fig,
+        margin=margin,
+        sort_order=sort_order,
+        horizontal_lines=horizontal_lines,
+        x_axis_name=x_axis,
+        y_axis_name=y_axis,
+    )
     return fig
 
 
+def create_scatter_trace(
+    df: pd.DataFrame,
+    *,
+    x_column: str,
+    y_column: str,
+    color: COLOR = None,
+) -> go.Scatter:
+    return go.Scatter(
+        x=df[x_column],
+        y=df[y_column],
+        line={'color': color.value if color is not None else None},
+    )
+
+
 def create_line_chart(
     df: pd.DataFrame,
+    *,
     x_axis: str,
     y_axis: str,
-    margin: Optional[plotly_consts.MARGIN] = None,
-    color: Optional[plotly_consts.COLOR] = None,
-    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+    margin: MARGIN = None,
+    color: COLOR = None,
+    vertical_lines: LINES = None,
 ) -> go.Figure:
-    fig = px.line(df, x=x_axis, y=y_axis)
-    update_figure(fig, margin=margin, color=color, vertical_lines=vertical_lines)
+    fig = go.Figure(create_scatter_trace(df, x_column=x_axis, y_column=y_axis, color=color))
+    update_figure(
+        fig,
+        margin=margin,
+        vertical_lines=vertical_lines,
+        x_axis_name=x_axis,
+        y_axis_name=y_axis,
+    )
     return fig
 
 
@@ -60,24 +111,31 @@ def create_histogram(
     x_axis: str,
     y_axis: str,
     n_bins: Optional[int] = None,
-    margin: Optional[plotly_consts.MARGIN] = None,
-    color: Optional[plotly_consts.COLOR] = None,
-    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+    margin: MARGIN = None,
+    color: COLOR = None,
+    vertical_lines: LINES = None,
 ) -> go.Figure:
     fig = px.histogram(df, x=x_axis, y=y_axis, nbins=n_bins)
     update_figure(
-        fig, margin=margin, color=color, vertical_lines=vertical_lines, x_axis_name=x_axis, y_axis_name=y_axis,
+        fig,
+        margin=margin,
+        color=color,
+        vertical_lines=vertical_lines,
+        x_axis_name=x_axis,
+        y_axis_name=y_axis,
     )
     return fig
 
 
 def update_figure(
     fig: go.Figure,
-    margin: Optional[plotly_consts.MARGIN] = None,
-    sort_order: Optional[plotly_consts.SORT_ORDER] = None,
-    color: Optional[plotly_consts.COLOR] = None,
-    horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
-    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+    *,
+    margin: MARGIN = None,
+    sort_order: SORT_ORDER = None,
+    color: COLOR = None,
+    colorway: COLORWAY = None,
+    horizontal_lines: LINES = None,
+    vertical_lines: LINES = None,
     x_axis_name: Optional[str] = None,
     y_axis_name: Optional[str] = None,
 ) -> None:
@@ -95,6 +153,9 @@ def update_figure(
     if y_axis_name is not None:
         new_layout['yaxis_title'] = y_axis_name
 
+    if colorway is not None:
+        new_layout['colorway'] = colorway.value
+
     fig.update_layout(**new_layout)
 
     new_trace = {}
@@ -110,7 +171,7 @@ def update_figure(
 
     if vertical_lines is not None:
         for x, annotation in vertical_lines.items():
-            fig.add_vline(x=x, annotation_text=annotation)
+            fig.add_vline(x=x, annotation_text=annotation, annotation_textangle=90)
 
 
 def save_plot(
@@ -121,4 +182,7 @@ def save_plot(
 ) -> None:
     os.makedirs(dir_path, exist_ok=True)
     file = dir_path / f"{plot_name}{extension.value}"
-    fig.write_image(str(file))
+    if extension == Extension.HTML:
+        fig.write_html(str(file))
+    else:
+        fig.write_image(str(file))
diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png
diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot_grouped.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot_grouped.png
diff --git a/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png
diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png
diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png
diff --git a/src/python/evaluation/plots/plotters/diffs_plotters.py b/src/python/evaluation/plots/plotters/diffs_plotters.py
@@ -31,7 +31,9 @@ def _get_dataframe_from_dict(
 
 
 def _extract_stats_from_issues_statistics(
-    statistics: IssuesStatistics, limit: int, only_unique: bool,
+    statistics: IssuesStatistics,
+    limit: int,
+    only_unique: bool,
 ) -> Dict[IssueType, int]:
     categorized_statistics = statistics.get_short_categorized_statistics()
 
@@ -61,7 +63,14 @@ def get_unique_issues_by_category(
         key_mapper=lambda issue_type: issue_type.name,
     )
 
-    return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
+    return create_bar_plot(
+        df,
+        x_axis=x_axis_name,
+        y_axis=y_axis_name,
+        margin=margin,
+        sort_order=sort_order,
+        color=color,
+    )
 
 
 def get_issues_by_category(
@@ -82,7 +91,14 @@ def get_issues_by_category(
         key_mapper=lambda issue_type: issue_type.name,
     )
 
-    return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
+    return create_bar_plot(
+        df,
+        x_axis=x_axis_name,
+        y_axis=y_axis_name,
+        margin=margin,
+        sort_order=sort_order,
+        color=color,
+    )
 
 
 def get_median_penalty_influence_by_category(
@@ -105,7 +121,14 @@ def get_median_penalty_influence_by_category(
         value_mapper=lambda influence: median(influence),
     )
 
-    return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
+    return create_bar_plot(
+        df,
+        x_axis=x_axis_name,
+        y_axis=y_axis_name,
+        margin=margin,
+        sort_order=sort_order,
+        color=color,
+    )
 
 
 def get_penalty_influence_distribution(
@@ -128,4 +151,11 @@ def get_penalty_influence_distribution(
     )
     df = df.explode(y_axis_name)
 
-    return create_box_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
+    return create_box_plot(
+        df,
+        x_axis=x_axis_name,
+        y_axis=y_axis_name,
+        margin=margin,
+        sort_order=sort_order,
+        color=color,
+    )