diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/README.md b/src/python/evaluation/paper_evaluation/issues_statistics/README.md
new file mode 100644
index 00000000..0c185a79
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/issues_statistics/README.md
@@ -0,0 +1,107 @@
+# Raw issue statistics visualization
+
+This script allows you to visualize raw issue statistics for a paper.
+
+## Usage
+Run the [raw_issues_statistics_visualization.py](./raw_issues_statistics_visualization.py) with the arguments from command line.
+
+**Required arguments**:
+
+-`stats_path` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../../issues_statistics/get_raw_issues_statistics.py). Must be an xlsx or csv file.
+-`config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config).
+-`save_dir` — directory where the plotted charts will be saved.
+
+**Optional arguments**:
+Argument | Description
+--- | ---
+**‑‑file‑extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`.
+
+## Config
+The configuration file is a yaml file where each group name has its config. The group config contains `plot_config` and configs for each column of statistics.
+
+The `plot_config` consists of the following parameters:
+- `rows` — number of rows. Default: `1`.
+- `cols` — number of cols. Default: `1`.
+- `height` — graph height. Default: `800`.
+- `width` — graph width. Default: `1600`.
+- `x_axis_name` — name of the x-axis. Default: `Value`.
+- `y_axis_name` — name of the y-axis. Default: `Quantity`.
+- `specs` — сonfiguration of traces on the graph. See [documentation](https://plotly.com/python-api-reference/generated/plotly.subplots.make_subplots.html) for details. Default: `None`.
+
+The column config consists of the following arguments:
+- `range_of_values` — allows you to filter the values. It is an array of two values: a and b. Only values that belong to the range [a, b) are taken into account when plotting. By default, all values are taken into account when plotting.
+- `trace_name` — trace name. The default is the name of the column.
+
+## Examples
+### config.yaml
+```yaml
+measurable:
+ plot_config:
+ rows: 2
+ cols: 2
+ specs: [[{}, {}], [{colspan: 2}, null]]
+ x_axis_name: Measure
+ y_axis_name: Number of issues
+ BOOL_EXPR_LEN:
+ range_of_values: [1, 11]
+ trace_name: Boolean Expresion Length
+ CYCLOMATIC_COMPLEXITY:
+ range_of_values: [1, 11]
+ trace_name: Cyclomatic Complexity
+ FUNC_LEN:
+ range_of_values: [0, 60]
+ trace_name: Function Length
+
+maintainability_and_cohesion:
+ plot_config:
+ rows: 2
+ width: 1000
+ x_axis_name: Lack of measure (%)
+ y_axis_name: Number of issues
+ MAINTAINABILITY:
+ trace_name: Maintainability
+ COHESION:
+ trace_name: Cohesion
+
+ratio:
+ plot_config:
+ rows: 2
+ width: 1000
+ x_axis_name: Ratio (%)
+ y_axis_name: Number of fragments
+ CODE_STYLE_ratio:
+ range_of_values: [ 1, 101 ]
+ trace_name: Code Style
+ LINE_LEN_ratio:
+ range_of_values: [ 1, 101 ]
+ trace_name: Line Length
+
+countable:
+ plot_config:
+ rows: 2
+ cols: 2
+ specs: [[{"rowspan": 2}, {}], [null, {}]]
+ x_axis_name: Number of issues in one fragment
+ y_axis_name: Number of fragments
+ ERROR_PRONE:
+ range_of_values: [ 0, 10 ]
+ trace_name: Error Prone
+ BEST_PRACTICES:
+ range_of_values: [ 0, 10 ]
+ trace_name: Best Practices
+ COMPLEXITY:
+ range_of_values: [ 0, 10 ]
+ trace_name: Complexity
+```
+
+### measurable.png
+
+
+### maintainability_and_cohesion.png
+
+
+### ratio.png
+
+
+### countable.png
+
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/__init__.py b/src/python/evaluation/paper_evaluation/issues_statistics/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/examples/countable.png b/src/python/evaluation/paper_evaluation/issues_statistics/examples/countable.png
new file mode 100644
index 00000000..28f86363
Binary files /dev/null and b/src/python/evaluation/paper_evaluation/issues_statistics/examples/countable.png differ
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/examples/maintainability_and_cohesion.png b/src/python/evaluation/paper_evaluation/issues_statistics/examples/maintainability_and_cohesion.png
new file mode 100644
index 00000000..a037a49a
Binary files /dev/null and b/src/python/evaluation/paper_evaluation/issues_statistics/examples/maintainability_and_cohesion.png differ
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/examples/measurable.png b/src/python/evaluation/paper_evaluation/issues_statistics/examples/measurable.png
new file mode 100644
index 00000000..ae892783
Binary files /dev/null and b/src/python/evaluation/paper_evaluation/issues_statistics/examples/measurable.png differ
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/examples/ratio.png b/src/python/evaluation/paper_evaluation/issues_statistics/examples/ratio.png
new file mode 100644
index 00000000..52d9953c
Binary files /dev/null and b/src/python/evaluation/paper_evaluation/issues_statistics/examples/ratio.png differ
diff --git a/src/python/evaluation/paper_evaluation/issues_statistics/raw_issues_statistics_visualization.py b/src/python/evaluation/paper_evaluation/issues_statistics/raw_issues_statistics_visualization.py
new file mode 100644
index 00000000..10450119
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/issues_statistics/raw_issues_statistics_visualization.py
@@ -0,0 +1,207 @@
+import argparse
+import logging
+import sys
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Dict, List, Optional
+
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
+from src.python.evaluation.plots.common.utils import get_supported_extensions, save_plot
+from src.python.evaluation.plots.plotters.raw_issues_statistics_plotters import prepare_stats
+from src.python.review.common.file_system import Extension, parse_yaml
+
+logger = logging.getLogger(__name__)
+COLORWAY = ['rgb(47,22,84)', 'rgb(99,47,177)', 'rgb(153,110,216)']
+
+
+class _ConfigFields(Enum):
+ PLOT_CONFIG = 'plot_config'
+ ROWS = 'rows'
+ COLS = 'cols'
+ SPECS = 'specs'
+ HEIGHT = 'height'
+ WIDTH = 'width'
+ X_AXIS_NAME = 'x_axis_name'
+ Y_AXIS_NAME = 'y_axis_name'
+
+ RANGE_OF_VALUES = 'range_of_values'
+ TRACE_NAME = 'trace_name'
+
+
+_PLOT_CONFIG = _ConfigFields.PLOT_CONFIG.value
+_ROWS = _ConfigFields.ROWS.value
+_COLS = _ConfigFields.COLS.value
+_SPECS = _ConfigFields.SPECS.value
+_HEIGHT = _ConfigFields.HEIGHT.value
+_WIDTH = _ConfigFields.WIDTH.value
+_X_AXIS_NAME = _ConfigFields.X_AXIS_NAME.value
+_Y_AXIS_NAME = _ConfigFields.Y_AXIS_NAME.value
+_RANGE_OF_VALUES = _ConfigFields.RANGE_OF_VALUES.value
+_TRACE_NAME = _ConfigFields.TRACE_NAME.value
+
+
+@dataclass
+class PlotConfig:
+ name: str
+ rows: int = 1
+ cols: int = 1
+ height: int = 800
+ width: int = 1600
+ x_axis_name: str = 'Value'
+ y_axis_name: str = 'Quantity'
+ specs: Optional[List] = None
+
+ @staticmethod
+ def get_from_dict(plot_name: str, config: Dict) -> 'PlotConfig':
+ params = {'name': plot_name}
+ params.update(config)
+ return PlotConfig(**params)
+
+
+@dataclass
+class TraceConfig:
+ column: str
+ range_of_values: Optional[range] = None
+ trace_name: Optional[str] = None
+
+ @staticmethod
+ def get_from_dict(column_name: str, config: Dict) -> 'TraceConfig':
+ params = {'column': column_name}
+ params.update(config)
+
+ if _RANGE_OF_VALUES in params:
+ params[_RANGE_OF_VALUES] = range(*params[_RANGE_OF_VALUES])
+
+ return TraceConfig(**params)
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+ parser.add_argument(
+ 'stats_path',
+ type=lambda value: Path(value).absolute(),
+ help='Path to the statistics file. Must be an xlsx or csv file.',
+ )
+
+ parser.add_argument(
+ 'config_path',
+ type=lambda value: Path(value).absolute(),
+ help='Path to the yaml file containing information about the graphs to be plotted.',
+ )
+
+ parser.add_argument(
+ 'save_dir',
+ type=lambda value: Path(value).absolute(),
+ help='The directory where the plotted charts will be saved.',
+ )
+
+ parser.add_argument(
+ '--file-extension',
+ type=str,
+ default=Extension.SVG.value,
+ choices=get_supported_extensions(),
+ help='Allows you to select the extension of output files.',
+ )
+
+
+def _update_fig(fig: go.Figure, plot_config: PlotConfig) -> None:
+ fig.update_layout(
+ width=plot_config.width,
+ height=plot_config.height,
+ font_size=22,
+ paper_bgcolor='rgba(0,0,0,0)',
+ plot_bgcolor='rgba(0,0,0,0)',
+ colorway=COLORWAY,
+ )
+
+ axes_common_params = {
+ 'showline': True,
+ 'linewidth': 1,
+ 'linecolor': 'black',
+ 'mirror': True,
+ }
+
+ fig.update_xaxes(title=plot_config.x_axis_name, **axes_common_params)
+ fig.update_yaxes(title=plot_config.y_axis_name, **axes_common_params)
+
+
+def build_subplots(df: pd.DataFrame, plot_config: PlotConfig, trace_configs: List[TraceConfig]) -> go.Figure:
+ fig = make_subplots(
+ rows=plot_config.rows,
+ cols=plot_config.cols,
+ specs=plot_config.specs,
+ )
+
+ if plot_config.specs is None:
+ plot_config.specs = [[{} for _ in range(plot_config.cols)] for _ in range(plot_config.rows)]
+
+ for row_index, row in enumerate(plot_config.specs, start=1):
+ for column_index, cell in enumerate(row, start=1):
+ if cell is None:
+ continue
+
+ trace_config = trace_configs.pop(0)
+
+ stats = prepare_stats(
+ df,
+ trace_config.column,
+ trace_config.range_of_values,
+ plot_config.x_axis_name,
+ plot_config.y_axis_name,
+ )
+
+ fig.add_scatter(
+ x=stats[plot_config.x_axis_name],
+ y=stats[plot_config.y_axis_name],
+ col=column_index,
+ row=row_index,
+ line={'width': 5},
+ marker={'size': 10},
+ name=trace_config.trace_name if trace_config.trace_name is not None else trace_config.column,
+ )
+
+ _update_fig(fig, plot_config)
+
+ return fig
+
+
+def plot_and_save(stats: pd.DataFrame, config: Dict, output_dir: Path, extension: Extension) -> None:
+ for group_name, group_config in config.items():
+ plot_config = PlotConfig.get_from_dict(group_name, group_config.pop(_PLOT_CONFIG))
+ trace_configs = []
+ for column_name, column_config in group_config.items():
+ trace_configs.append(TraceConfig.get_from_dict(column_name, column_config))
+ subplots = build_subplots(stats, plot_config, trace_configs)
+ save_plot(subplots, output_dir, group_name, extension)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ configure_arguments(parser)
+
+ try:
+ args = parser.parse_args()
+
+ config = parse_yaml(args.config_path)
+ stats = get_solutions_df_by_file_path(args.stats_path)
+
+ plot_and_save(stats, config, args.save_dir, Extension(args.file_extension))
+
+ return 0
+
+ except IndexError:
+ logger.error(
+ 'The number of traces must be consistent with the number of rows and columns, as well as the specs.',
+ )
+ return 2
+
+ except Exception:
+ logger.exception('An unexpected error.')
+ return 2
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
index abc74092..4e316e06 100644
--- a/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
+++ b/src/python/evaluation/plots/plotters/raw_issues_statistics_plotters.py
@@ -53,11 +53,17 @@ class PlotConfig:
n_bins: Optional[int] = None
-def _prepare_stats(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_axis_name: str) -> pd.DataFrame:
- result_df = stats[[VALUE, config.column]]
-
- if config.range_of_values is not None:
- result_df = result_df[result_df[VALUE].isin(config.range_of_values)]
+def prepare_stats(
+ stats: pd.DataFrame,
+ column: str,
+ range_of_values: Optional[range],
+ x_axis_name: str,
+ y_axis_name: str,
+) -> pd.DataFrame:
+ result_df = stats[[VALUE, column]]
+
+ if range_of_values is not None:
+ result_df = result_df[result_df[VALUE].isin(range_of_values)]
result_df.set_index(VALUE, inplace=True)
@@ -67,25 +73,31 @@ def _prepare_stats(stats: pd.DataFrame, config: PlotConfig, x_axis_name: str, y_
# Fill in the missing intermediate values with zeros
min_index, max_index = result_df.index.min(), result_df.index.max()
if pd.isna(min_index) or pd.isna(max_index):
- logger.warning(f'{config.column}: no data')
+ logger.warning(f'{column}: no data')
else:
result_df = result_df.reindex(range(min_index, max_index + 1), fill_value=0)
result_df.reset_index(inplace=True)
- return result_df.rename(columns={VALUE: x_axis_name, config.column: y_axis_name})
+ return result_df.rename(columns={VALUE: x_axis_name, column: y_axis_name})
-def _get_axis_names(config: PlotConfig, default_x_axis_name: str, default_y_axis_name: str) -> Tuple[str, str]:
- x_axis_name = default_x_axis_name
- if config.x_axis_name is not None:
- x_axis_name = config.x_axis_name
+def _get_axis_names(
+ *,
+ x_axis_name: Optional[str],
+ y_axis_name: Optional[str],
+ default_x_axis_name: str,
+ default_y_axis_name: str,
+) -> Tuple[str, str]:
+ new_x_axis_name = default_x_axis_name
+ if x_axis_name is not None:
+ new_x_axis_name = x_axis_name
- y_axis_name = default_y_axis_name
- if config.y_axis_name is not None:
- y_axis_name = config.y_axis_name
+ new_y_axis_name = default_y_axis_name
+ if y_axis_name is not None:
+ new_y_axis_name = y_axis_name
- return x_axis_name, y_axis_name
+ return new_x_axis_name, new_y_axis_name
def plot_line_chart(
@@ -93,12 +105,17 @@ def plot_line_chart(
config: PlotConfig,
group_stats: bool,
) -> Dict[str, go.Figure]:
- x_axis_name, y_axis_name = _get_axis_names(config, default_x_axis_name='Value', default_y_axis_name='Quantity')
+ x_axis_name, y_axis_name = _get_axis_names(
+ x_axis_name=config.x_axis_name,
+ y_axis_name=config.y_axis_name,
+ default_x_axis_name='Value',
+ default_y_axis_name='Quantity',
+ )
if not group_stats:
plots = {}
for lang, stats in stats_by_lang.items():
- stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+ stats = prepare_stats(stats, config.column, config.range_of_values, x_axis_name, y_axis_name)
plots[lang] = create_line_chart(
stats,
x_axis=x_axis_name,
@@ -111,7 +128,7 @@ def plot_line_chart(
plot = go.Figure()
for lang, stats in stats_by_lang.items():
- stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+ stats = prepare_stats(stats, config.column, config.range_of_values, x_axis_name, y_axis_name)
trace = create_scatter_trace(stats, x_column=x_axis_name, y_column=y_axis_name)
trace.name = lang
plot.add_trace(trace)
@@ -134,7 +151,10 @@ def plot_histogram(
group_stats: bool,
) -> Dict[str, go.Figure]:
x_axis_name, y_axis_name = _get_axis_names(
- config, default_x_axis_name='Value', default_y_axis_name='Quantity',
+ x_axis_name=config.x_axis_name,
+ y_axis_name=config.y_axis_name,
+ default_x_axis_name='Value',
+ default_y_axis_name='Quantity',
)
if group_stats:
@@ -142,7 +162,7 @@ def plot_histogram(
plots = {}
for lang, stats in stats_by_lang.items():
- stats = _prepare_stats(stats, config, x_axis_name, y_axis_name)
+ stats = prepare_stats(stats, config.column, config.range_of_values, x_axis_name, y_axis_name)
plots[lang] = create_histogram(
stats,
x_axis_name,
@@ -172,7 +192,8 @@ def plot_box_plot(
group_stats: bool,
) -> Dict[str, go.Figure]:
x_axis_name, y_axis_name = _get_axis_names(
- config,
+ x_axis_name=config.x_axis_name,
+ y_axis_name=config.y_axis_name,
default_x_axis_name='Category',
default_y_axis_name='Values',
)