hyperskill · nbirillo · Aug 13, 2021 · Aug 9, 2021 · Aug 9, 2021 · Aug 9, 2021
diff --git a/src/python/evaluation/plots/README.md b/src/python/evaluation/plots/README.md
@@ -1,7 +1,7 @@
 # Hyperstyle evaluation: plots
-This module allows you to visualize the data obtained with the [inspectors](../inspectors) module
+This module allows you to visualize the data.
 
-## [diffs_plotter.py](diffs_plotter.py)
+## Diffs plotter
 This script allows you to visualize a dataset obtained with [diffs_between_df.py](../inspectors/diffs_between_df.py). 
 
 The script can build the following charts: 
@@ -83,3 +83,68 @@ The result will be four graphs (`unique_issues_by_category`, `unique_penalty_iss
 
 #### Distribution of influence on penalty by category
 <img src="./examples/penalty_influence_distribution.png" width="500">
+
+## Raw issues statistics plotter
+This script allows you to visualize a dataset obtained with [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py). 
+
+The script can build the following charts: 
+* Line chart ([Example](#line-chart))
+* Box plot ([Example](#box-plot))
+* Histogram ([Example](#histogram))
+
+### Usage
+Run the [raw_issues_statistics_plotter.py](raw_issues_statistics_plotter.py) with the arguments from command line.
+
+**Required arguments**:
+1. `stats` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py).
+2. `save_dir` — directory where the plotted charts will be saved.
+3. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).
+
+**Optional arguments**:
+
+Argument | Description
+--- | ---
+**&#8209;&#8209;file&#8209;extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`.
+
+### Config
+The configuration file is a dictionary in yaml format, where for each column of the original dataset the types of graphs to be plotted are specified. You can also put the common parameters when plotting multiple graphs for one column in a separate `common` group.
+
+**Possible values of the charts**: 
+* `line_chart`
+* `histogram`
+* `box_plot`
+
+**Possible parameters**:
+Parametr | Description
+---|---
+**x_axis_name** | Name of the x-axis. The default value depends on the type of chart.
+**y_axis_name** | Name of the y-axis. The default value depends on the type of chart.
+**boundaries** | Dictionary consisting of pairs `boundary value`: `boundary name` (boundary name may not exist). Allows to draw vertical or horizontal lines on graphs (depending on the type of plot). By default, the boundaries are not drawn.
+**range_of_values** | Allows you to filter the values. It is an array of two values: a and b. Only values that belong to the range &#91;a, b&#41; are taken into account when plotting. By default, all values are taken into account when plotting.
+**margin** | Defines the outer margin on all four sides of the chart. The available values are specified in the Enum class `MARGIN` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**sort_order** | Defines the sorting order of the chart. The available values are specified in the Enum class `SORT_ORDER` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**color** | Defines the color of the chart. The available values are specified in the Enum class `COLOR` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**n_bins** | Allows you to adjust the number of bins when plotting a box plot. By default, this value is set by Plotly.
+
+#### Example of config
+```yaml
+CYCLOMATIC_COMPLEXITY:
+  line_chart:
+    x_axis_name: Cyclomatic complexity value
+  histigram:
+  common:
+    range_of_values: [0, 20]
+```
+
+The result will be two graphs: line chart and histogram. The values in both charts will be between 0 and 19 inclusive. In the line chart the x-axis will be named "Cyclomatic complexity value". 
+
+### Examples
+
+#### Line chart
+<img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart.png" width="500">
+
+#### Box plot
+<img src="./examples/BEST_PRACTICES_box_plot.png" width="500">
+
+#### Histogram
+<img src="./examples/CODE_STYLE_ratio_histogram.png" width="500">
diff --git a/src/python/evaluation/plots/common/utils.py b/src/python/evaluation/plots/common/utils.py
@@ -1,6 +1,6 @@
 import os
 from pathlib import Path
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import pandas as pd
 import plotly.express as px
@@ -35,9 +35,39 @@ def create_box_plot(
     margin: Optional[plotly_consts.MARGIN] = None,
     sort_order: Optional[plotly_consts.SORT_ORDER] = None,
     color: Optional[plotly_consts.COLOR] = None,
+    horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
 ) -> go.Figure:
     fig = px.box(df, x=x_axis, y=y_axis)
-    update_figure(fig, margin, sort_order, color)
+    update_figure(fig, margin=margin, sort_order=sort_order, color=color, horizontal_lines=horizontal_lines)
+    return fig
+
+
+def create_line_chart(
+    df: pd.DataFrame,
+    x_axis: str,
+    y_axis: str,
+    margin: Optional[plotly_consts.MARGIN] = None,
+    color: Optional[plotly_consts.COLOR] = None,
+    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+) -> go.Figure:
+    fig = px.line(df, x=x_axis, y=y_axis)
+    update_figure(fig, margin=margin, color=color, vertical_lines=vertical_lines)
+    return fig
+
+
+def create_histogram(
+    df: pd.DataFrame,
+    x_axis: str,
+    y_axis: str,
+    n_bins: Optional[int] = None,
+    margin: Optional[plotly_consts.MARGIN] = None,
+    color: Optional[plotly_consts.COLOR] = None,
+    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+) -> go.Figure:
+    fig = px.histogram(df, x=x_axis, y=y_axis, nbins=n_bins)
+    update_figure(
+        fig, margin=margin, color=color, vertical_lines=vertical_lines, x_axis_name=x_axis, y_axis_name=y_axis,
+    )
     return fig
 
 
@@ -46,6 +76,10 @@ def update_figure(
     margin: Optional[plotly_consts.MARGIN] = None,
     sort_order: Optional[plotly_consts.SORT_ORDER] = None,
     color: Optional[plotly_consts.COLOR] = None,
+    horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
+    vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+    x_axis_name: Optional[str] = None,
+    y_axis_name: Optional[str] = None,
 ) -> None:
     new_layout = {}
 
@@ -55,6 +89,12 @@ def update_figure(
     if sort_order is not None:
         new_layout["xaxis"] = {"categoryorder": sort_order.value}
 
+    if x_axis_name is not None:
+        new_layout['xaxis_title'] = x_axis_name
+
+    if y_axis_name is not None:
+        new_layout['yaxis_title'] = y_axis_name
+
     fig.update_layout(**new_layout)
 
     new_trace = {}
@@ -64,6 +104,14 @@ def update_figure(
 
     fig.update_traces(**new_trace)
 
+    if horizontal_lines is not None:
+        for y, annotation in horizontal_lines.items():
+            fig.add_hline(y=y, annotation_text=annotation)
+
+    if vertical_lines is not None:
+        for x, annotation in vertical_lines.items():
+            fig.add_vline(x=x, annotation_text=annotation)
+
 
 def save_plot(
     fig: go.Figure,

diff --git a/src/python/evaluation/plots/diffs_plotter.py b/src/python/evaluation/plots/diffs_plotter.py
@@ -15,13 +15,13 @@
 )
 from src.python.evaluation.inspectors.print_inspectors_statistics import gather_statistics
 from src.python.evaluation.plots.common import plotly_consts
-from src.python.evaluation.plots.common.plotters import (
+from src.python.evaluation.plots.common.utils import get_supported_extensions, save_plot
+from src.python.evaluation.plots.plotters.diffs_plotters import (
     get_issues_by_category,
     get_median_penalty_influence_by_category,
     get_penalty_influence_distribution,
     get_unique_issues_by_category,
 )
-from src.python.evaluation.plots.common.utils import get_supported_extensions, save_plot
 from src.python.review.common.file_system import deserialize_data_from_file, Extension, parse_yaml
 
 

diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png
diff --git a/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png
diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png
diff --git a/src/python/evaluation/plots/plotters/__init__.py b/src/python/evaluation/plots/plotters/__init__.py
diff --git a/...ython/evaluation/plots/common/plotters.py → ...aluation/plots/plotters/diffs_plotters.py b/...ython/evaluation/plots/common/plotters.py → ...aluation/plots/plotters/diffs_plotters.py
diff --git a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
@@ -0,0 +1,124 @@
+from dataclasses import dataclass
+from enum import Enum, unique
+from typing import Callable, Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from src.python.evaluation.issues_statistics.get_raw_issues_statistics import VALUE
+from src.python.evaluation.plots.common import plotly_consts
+from src.python.evaluation.plots.common.utils import create_box_plot, create_histogram, create_line_chart
+
+
+@unique
+class PlotTypes(Enum):
+    LINE_CHART = 'line_chart'
+    HISTOGRAM = 'histogram'
+    BOX_PLOT = 'box_plot'
+
+    def to_plotter_function(self) -> Callable[..., go.Figure]:
+        type_to_function = {
+            PlotTypes.LINE_CHART: plot_line_chart,
+            PlotTypes.HISTOGRAM: plot_histogram,
+            PlotTypes.BOX_PLOT: plot_box_plot,
+        }
+
+        return type_to_function[self]
+
+
+@dataclass
+class PlotConfig:
+    column: str
+    type: PlotTypes
+    x_axis_name: Optional[str] = None
+    y_axis_name: Optional[str] = None
+    margin: Optional[plotly_consts.MARGIN] = None
+    color: Optional[plotly_consts.COLOR] = None
+    boundaries: Optional[Dict[int, Optional[str]]] = None
+    range_of_values: Optional[range] = None
+    n_bins: Optional[int] = None
+
+
+def _prepare_stats(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_axis_name: str) -> pd.DataFrame:
+    result_df = stats[[VALUE, config.column]]
+
+    if config.range_of_values is not None:
+        result_df = result_df[result_df[VALUE].isin(config.range_of_values)]
+
+    result_df.set_index(VALUE, inplace=True)
+
+    # Trim trailing zeros
+    result_df = result_df.apply(lambda column: np.trim_zeros(column, trim='b')).dropna()
+
+    # Fill in the missing intermediate values with zeros
+    min_index, max_index = result_df.index.min(), result_df.index.max()
+    result_df = result_df.reindex(range(min_index, max_index + 1), fill_value=0)
+
+    result_df.reset_index(inplace=True)
+
+    return result_df.rename(columns={VALUE: x_axis_name, config.column: y_axis_name})
+
+
+def _get_axis_names(config: PlotConfig, default_x_axis_name: str, default_y_axis_name: str) -> Tuple[str, str]:
+    x_axis_name = default_x_axis_name
+    if config.x_axis_name is not None:
+        x_axis_name = config.x_axis_name
+
+    y_axis_name = default_y_axis_name
+    if config.y_axis_name is not None:
+        y_axis_name = config.y_axis_name
+
+    return x_axis_name, y_axis_name
+
+
+def plot_line_chart(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+    x_axis_name, y_axis_name = _get_axis_names(
+        config, default_x_axis_name='Value', default_y_axis_name='Number of fragments',
+    )
+
+    stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+
+    return create_line_chart(
+        stats, x_axis_name, y_axis_name, margin=config.margin, color=config.color, vertical_lines=config.boundaries,
+    )
+
+
+def plot_histogram(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+    x_axis_name, y_axis_name = _get_axis_names(
+        config, default_x_axis_name='Value', default_y_axis_name='Number of fragments',
+    )
+
+    stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+
+    return create_histogram(
+        stats,
+        x_axis_name,
+        y_axis_name,
+        margin=config.margin,
+        color=config.color,
+        n_bins=config.n_bins,
+        vertical_lines=config.boundaries,
+    )
+
+
+def _get_all_values_from_stats(stats: pd.DataFrame, column_name: str) -> List[int]:
+    result = []
+    stats.apply(lambda row: result.extend([row[VALUE]] * row[column_name]), axis=1)
+    return result
+
+
+def plot_box_plot(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+    x_axis_name, y_axis_name = _get_axis_names(
+        config,
+        default_x_axis_name="Category",
+        default_y_axis_name='Values',
+    )
+
+    values = _get_all_values_from_stats(stats, config.column)
+
+    if config.range_of_values is not None:
+        values = list(filter(lambda elem: elem in config.range_of_values, values))
+
+    values_df = pd.DataFrame.from_dict({x_axis_name: config.column, y_axis_name: values})
+
+    return create_box_plot(values_df, x_axis_name, y_axis_name, horizontal_lines=config.boundaries)