diff --git a/src/python/evaluation/plots/README.md b/src/python/evaluation/plots/README.md
index 6c8c3f11..bacce3d2 100644
--- a/src/python/evaluation/plots/README.md
+++ b/src/python/evaluation/plots/README.md
@@ -1,7 +1,7 @@
# Hyperstyle evaluation: plots
-This module allows you to visualize the data obtained with the [inspectors](../inspectors) module
+This module allows you to visualize the data.
-## [diffs_plotter.py](diffs_plotter.py)
+## Diffs plotter
This script allows you to visualize a dataset obtained with [diffs_between_df.py](../inspectors/diffs_between_df.py).
The script can build the following charts:
@@ -83,3 +83,68 @@ The result will be four graphs (`unique_issues_by_category`, `unique_penalty_iss
#### Distribution of influence on penalty by category
+
+## Raw issues statistics plotter
+This script allows you to visualize a dataset obtained with [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py).
+
+The script can build the following charts:
+* Line chart ([Example](#line-chart))
+* Box plot ([Example](#box-plot))
+* Histogram ([Example](#histogram))
+
+### Usage
+Run the [raw_issues_statistics_plotter.py](raw_issues_statistics_plotter.py) with the arguments from command line.
+
+**Required arguments**:
+1. `stats` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py).
+2. `save_dir` — directory where the plotted charts will be saved.
+3. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).
+
+**Optional arguments**:
+
+Argument | Description
+--- | ---
+**‑‑file‑extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`.
+
+### Config
+The configuration file is a dictionary in yaml format, where for each column of the original dataset the types of graphs to be plotted are specified. You can also put the common parameters when plotting multiple graphs for one column in a separate `common` group.
+
+**Possible values of the charts**:
+* `line_chart`
+* `histogram`
+* `box_plot`
+
+**Possible parameters**:
+Parametr | Description
+---|---
+**x_axis_name** | Name of the x-axis. The default value depends on the type of chart.
+**y_axis_name** | Name of the y-axis. The default value depends on the type of chart.
+**boundaries** | Dictionary consisting of pairs `boundary value`: `boundary name` (boundary name may not exist). Allows to draw vertical or horizontal lines on graphs (depending on the type of plot). By default, the boundaries are not drawn.
+**range_of_values** | Allows you to filter the values. It is an array of two values: a and b. Only values that belong to the range [a, b) are taken into account when plotting. By default, all values are taken into account when plotting.
+**margin** | Defines the outer margin on all four sides of the chart. The available values are specified in the Enum class `MARGIN` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**sort_order** | Defines the sorting order of the chart. The available values are specified in the Enum class `SORT_ORDER` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**color** | Defines the color of the chart. The available values are specified in the Enum class `COLOR` from [plots const file](./common/plotly_consts.py). If not specified, the default value provided by Plotly is used.
+**n_bins** | Allows you to adjust the number of bins when plotting a box plot. By default, this value is set by Plotly.
+
+#### Example of config
+```yaml
+CYCLOMATIC_COMPLEXITY:
+ line_chart:
+ x_axis_name: Cyclomatic complexity value
+ histigram:
+ common:
+ range_of_values: [0, 20]
+```
+
+The result will be two graphs: line chart and histogram. The values in both charts will be between 0 and 19 inclusive. In the line chart the x-axis will be named "Cyclomatic complexity value".
+
+### Examples
+
+#### Line chart
+
+
+#### Box plot
+
+
+#### Histogram
+
\ No newline at end of file
diff --git a/src/python/evaluation/plots/common/utils.py b/src/python/evaluation/plots/common/utils.py
index 8fb1673e..5c14d07c 100644
--- a/src/python/evaluation/plots/common/utils.py
+++ b/src/python/evaluation/plots/common/utils.py
@@ -1,6 +1,6 @@
import os
from pathlib import Path
-from typing import List, Optional
+from typing import Dict, List, Optional
import pandas as pd
import plotly.express as px
@@ -35,9 +35,39 @@ def create_box_plot(
margin: Optional[plotly_consts.MARGIN] = None,
sort_order: Optional[plotly_consts.SORT_ORDER] = None,
color: Optional[plotly_consts.COLOR] = None,
+ horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
) -> go.Figure:
fig = px.box(df, x=x_axis, y=y_axis)
- update_figure(fig, margin, sort_order, color)
+ update_figure(fig, margin=margin, sort_order=sort_order, color=color, horizontal_lines=horizontal_lines)
+ return fig
+
+
+def create_line_chart(
+ df: pd.DataFrame,
+ x_axis: str,
+ y_axis: str,
+ margin: Optional[plotly_consts.MARGIN] = None,
+ color: Optional[plotly_consts.COLOR] = None,
+ vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+) -> go.Figure:
+ fig = px.line(df, x=x_axis, y=y_axis)
+ update_figure(fig, margin=margin, color=color, vertical_lines=vertical_lines)
+ return fig
+
+
+def create_histogram(
+ df: pd.DataFrame,
+ x_axis: str,
+ y_axis: str,
+ n_bins: Optional[int] = None,
+ margin: Optional[plotly_consts.MARGIN] = None,
+ color: Optional[plotly_consts.COLOR] = None,
+ vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+) -> go.Figure:
+ fig = px.histogram(df, x=x_axis, y=y_axis, nbins=n_bins)
+ update_figure(
+ fig, margin=margin, color=color, vertical_lines=vertical_lines, x_axis_name=x_axis, y_axis_name=y_axis,
+ )
return fig
@@ -46,6 +76,10 @@ def update_figure(
margin: Optional[plotly_consts.MARGIN] = None,
sort_order: Optional[plotly_consts.SORT_ORDER] = None,
color: Optional[plotly_consts.COLOR] = None,
+ horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
+ vertical_lines: Optional[Dict[int, Optional[str]]] = None,
+ x_axis_name: Optional[str] = None,
+ y_axis_name: Optional[str] = None,
) -> None:
new_layout = {}
@@ -55,6 +89,12 @@ def update_figure(
if sort_order is not None:
new_layout["xaxis"] = {"categoryorder": sort_order.value}
+ if x_axis_name is not None:
+ new_layout['xaxis_title'] = x_axis_name
+
+ if y_axis_name is not None:
+ new_layout['yaxis_title'] = y_axis_name
+
fig.update_layout(**new_layout)
new_trace = {}
@@ -64,6 +104,14 @@ def update_figure(
fig.update_traces(**new_trace)
+ if horizontal_lines is not None:
+ for y, annotation in horizontal_lines.items():
+ fig.add_hline(y=y, annotation_text=annotation)
+
+ if vertical_lines is not None:
+ for x, annotation in vertical_lines.items():
+ fig.add_vline(x=x, annotation_text=annotation)
+
def save_plot(
fig: go.Figure,
diff --git a/src/python/evaluation/plots/diffs_plotter.py b/src/python/evaluation/plots/diffs_plotter.py
index 500b018e..5ecce6f7 100644
--- a/src/python/evaluation/plots/diffs_plotter.py
+++ b/src/python/evaluation/plots/diffs_plotter.py
@@ -15,13 +15,13 @@
)
from src.python.evaluation.inspectors.print_inspectors_statistics import gather_statistics
from src.python.evaluation.plots.common import plotly_consts
-from src.python.evaluation.plots.common.plotters import (
+from src.python.evaluation.plots.common.utils import get_supported_extensions, save_plot
+from src.python.evaluation.plots.plotters.diffs_plotters import (
get_issues_by_category,
get_median_penalty_influence_by_category,
get_penalty_influence_distribution,
get_unique_issues_by_category,
)
-from src.python.evaluation.plots.common.utils import get_supported_extensions, save_plot
from src.python.review.common.file_system import deserialize_data_from_file, Extension, parse_yaml
diff --git a/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png
new file mode 100644
index 00000000..b0108a1a
Binary files /dev/null and b/src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png differ
diff --git a/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png
new file mode 100644
index 00000000..6ebce84e
Binary files /dev/null and b/src/python/evaluation/plots/examples/CODE_STYLE_ratio_histogram.png differ
diff --git a/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png
new file mode 100644
index 00000000..3cb363e3
Binary files /dev/null and b/src/python/evaluation/plots/examples/CYCLOMATIC_COMPLEXITY_line_chart.png differ
diff --git a/src/python/evaluation/plots/plotters/__init__.py b/src/python/evaluation/plots/plotters/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/evaluation/plots/common/plotters.py b/src/python/evaluation/plots/plotters/diffs_plotters.py
similarity index 100%
rename from src/python/evaluation/plots/common/plotters.py
rename to src/python/evaluation/plots/plotters/diffs_plotters.py
diff --git a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
new file mode 100644
index 00000000..c545313d
--- /dev/null
+++ b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
@@ -0,0 +1,124 @@
+from dataclasses import dataclass
+from enum import Enum, unique
+from typing import Callable, Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from src.python.evaluation.issues_statistics.get_raw_issues_statistics import VALUE
+from src.python.evaluation.plots.common import plotly_consts
+from src.python.evaluation.plots.common.utils import create_box_plot, create_histogram, create_line_chart
+
+
+@unique
+class PlotTypes(Enum):
+ LINE_CHART = 'line_chart'
+ HISTOGRAM = 'histogram'
+ BOX_PLOT = 'box_plot'
+
+ def to_plotter_function(self) -> Callable[..., go.Figure]:
+ type_to_function = {
+ PlotTypes.LINE_CHART: plot_line_chart,
+ PlotTypes.HISTOGRAM: plot_histogram,
+ PlotTypes.BOX_PLOT: plot_box_plot,
+ }
+
+ return type_to_function[self]
+
+
+@dataclass
+class PlotConfig:
+ column: str
+ type: PlotTypes
+ x_axis_name: Optional[str] = None
+ y_axis_name: Optional[str] = None
+ margin: Optional[plotly_consts.MARGIN] = None
+ color: Optional[plotly_consts.COLOR] = None
+ boundaries: Optional[Dict[int, Optional[str]]] = None
+ range_of_values: Optional[range] = None
+ n_bins: Optional[int] = None
+
+
+def _prepare_stats(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_axis_name: str) -> pd.DataFrame:
+ result_df = stats[[VALUE, config.column]]
+
+ if config.range_of_values is not None:
+ result_df = result_df[result_df[VALUE].isin(config.range_of_values)]
+
+ result_df.set_index(VALUE, inplace=True)
+
+ # Trim trailing zeros
+ result_df = result_df.apply(lambda column: np.trim_zeros(column, trim='b')).dropna()
+
+ # Fill in the missing intermediate values with zeros
+ min_index, max_index = result_df.index.min(), result_df.index.max()
+ result_df = result_df.reindex(range(min_index, max_index + 1), fill_value=0)
+
+ result_df.reset_index(inplace=True)
+
+ return result_df.rename(columns={VALUE: x_axis_name, config.column: y_axis_name})
+
+
+def _get_axis_names(config: PlotConfig, default_x_axis_name: str, default_y_axis_name: str) -> Tuple[str, str]:
+ x_axis_name = default_x_axis_name
+ if config.x_axis_name is not None:
+ x_axis_name = config.x_axis_name
+
+ y_axis_name = default_y_axis_name
+ if config.y_axis_name is not None:
+ y_axis_name = config.y_axis_name
+
+ return x_axis_name, y_axis_name
+
+
+def plot_line_chart(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+ x_axis_name, y_axis_name = _get_axis_names(
+ config, default_x_axis_name='Value', default_y_axis_name='Number of fragments',
+ )
+
+ stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+
+ return create_line_chart(
+ stats, x_axis_name, y_axis_name, margin=config.margin, color=config.color, vertical_lines=config.boundaries,
+ )
+
+
+def plot_histogram(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+ x_axis_name, y_axis_name = _get_axis_names(
+ config, default_x_axis_name='Value', default_y_axis_name='Number of fragments',
+ )
+
+ stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+
+ return create_histogram(
+ stats,
+ x_axis_name,
+ y_axis_name,
+ margin=config.margin,
+ color=config.color,
+ n_bins=config.n_bins,
+ vertical_lines=config.boundaries,
+ )
+
+
+def _get_all_values_from_stats(stats: pd.DataFrame, column_name: str) -> List[int]:
+ result = []
+ stats.apply(lambda row: result.extend([row[VALUE]] * row[column_name]), axis=1)
+ return result
+
+
+def plot_box_plot(stats: pd.DataFrame, config: PlotConfig) -> go.Figure:
+ x_axis_name, y_axis_name = _get_axis_names(
+ config,
+ default_x_axis_name="Category",
+ default_y_axis_name='Values',
+ )
+
+ values = _get_all_values_from_stats(stats, config.column)
+
+ if config.range_of_values is not None:
+ values = list(filter(lambda elem: elem in config.range_of_values, values))
+
+ values_df = pd.DataFrame.from_dict({x_axis_name: config.column, y_axis_name: values})
+
+ return create_box_plot(values_df, x_axis_name, y_axis_name, horizontal_lines=config.boundaries)
diff --git a/src/python/evaluation/plots/raw_issues_statistics_plotter.py b/src/python/evaluation/plots/raw_issues_statistics_plotter.py
new file mode 100644
index 00000000..02888080
--- /dev/null
+++ b/src/python/evaluation/plots/raw_issues_statistics_plotter.py
@@ -0,0 +1,132 @@
+import argparse
+import sys
+from enum import Enum, unique
+from pathlib import Path
+from typing import Dict, List, Optional
+
+sys.path.append('../../../..')
+
+import pandas as pd
+from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
+from src.python.evaluation.plots.common import plotly_consts
+from src.python.evaluation.plots.common.utils import (
+ get_supported_extensions,
+ save_plot,
+)
+from src.python.evaluation.plots.plotters.raw_issues_statistics_plotters import PlotConfig, PlotTypes
+from src.python.review.common.file_system import Extension, parse_yaml
+
+
+@unique
+class ConfigFields(Enum):
+ X_AXIS_NAME = 'x_axis_name'
+ Y_AXIS_NAME = 'y_axis_name'
+ MARGIN = 'margin'
+ COLOR = 'color'
+ BOUNDARIES = 'boundaries'
+ COMMON = 'common'
+ RANGE_OF_VALUES = 'range_of_values'
+ N_BINS = 'n_bins'
+
+
+X_AXIS_NAME = ConfigFields.X_AXIS_NAME.value
+Y_AXIS_NAME = ConfigFields.Y_AXIS_NAME.value
+MARGIN = ConfigFields.MARGIN.value
+COLOR = ConfigFields.COLOR.value
+BOUNDARIES = ConfigFields.BOUNDARIES.value
+COMMON = ConfigFields.COMMON.value
+RANGE_OF_VALUES = ConfigFields.RANGE_OF_VALUES.value
+N_BINS = ConfigFields.N_BINS.value
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(
+ 'stats',
+ type=lambda value: Path(value).absolute(),
+ help='Path to dataset with statistics.',
+ )
+
+ parser.add_argument(
+ 'save_dir',
+ type=lambda value: Path(value).absolute(),
+ help='The directory where the plotted charts will be saved.',
+ )
+
+ parser.add_argument(
+ 'config_path',
+ type=lambda value: Path(value).absolute(),
+ help='Path to the yaml file containing information about the graphs to be plotted.',
+ )
+
+ parser.add_argument(
+ '--file-extension',
+ type=str,
+ default=Extension.SVG.value,
+ choices=get_supported_extensions(),
+ help='Allows you to select the extension of output files.',
+ )
+
+
+def _get_plot_config(
+ column_name: str,
+ plot_type: str,
+ plot_config: Optional[Dict],
+ common: Optional[Dict],
+) -> PlotConfig:
+ params = {'column': column_name, 'type': PlotTypes(plot_type.lower())}
+
+ if common is not None:
+ params.update(common)
+
+ if plot_config is not None:
+ params.update(plot_config)
+
+ if MARGIN in params:
+ margin_value = params.get(MARGIN).upper()
+ params[MARGIN] = plotly_consts.MARGIN[margin_value]
+
+ if COLOR in params:
+ color_value = params.get(COLOR).upper()
+ params[COLOR] = plotly_consts.COLOR[color_value]
+
+ if RANGE_OF_VALUES in params:
+ params[RANGE_OF_VALUES] = range(*params[RANGE_OF_VALUES])
+
+ return PlotConfig(**params)
+
+
+def get_plot_configs(column_name: str, column_config: Dict) -> List[PlotConfig]:
+ common = column_config.pop(COMMON, None)
+
+ plot_configs = []
+ for plot_type, plot_config in column_config.items():
+ plot_configs.append(_get_plot_config(column_name, plot_type, plot_config, common))
+
+ return plot_configs
+
+
+def plot_and_save(config: Dict, stats: pd.DataFrame, save_dir: Path, extension: Extension) -> None:
+ for column_name, column_config in config.items():
+ plot_configs = get_plot_configs(column_name, column_config)
+ for plot_config in plot_configs:
+ plotter_function = plot_config.type.to_plotter_function()
+ plot = plotter_function(stats, plot_config)
+ subdir = save_dir / plot_config.column
+ save_plot(plot, subdir, plot_name=f'{plot_config.column}_{plot_config.type.value}', extension=extension)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ configure_arguments(parser)
+ args = parser.parse_args()
+
+ stats = get_solutions_df_by_file_path(args.stats)
+
+ extension = Extension(args.file_extension)
+ config = parse_yaml(args.config_path)
+
+ plot_and_save(config, stats, args.save_dir, extension)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/whitelist.txt b/whitelist.txt
index 67d97c06..cae7f1ef 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -12,6 +12,7 @@ checkstyle
cloneable
concat
config
+configs
conftest
const
consts
@@ -65,6 +66,7 @@ getuid
gradle
groupby
hashtable
+hline
hyperstyle
idx
ignorecase
@@ -108,6 +110,7 @@ multilabel
multiline
multithreading
namespace
+nbins
ncss
ndarray
nl
@@ -179,6 +182,7 @@ usecols
util
utils
varargs
+vline
wandb
warmup
webp