Skip to content
22 changes: 16 additions & 6 deletions src/python/evaluation/plots/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,20 @@ The script can build the following charts:
Run the [raw_issues_statistics_plotter.py](raw_issues_statistics_plotter.py) with the arguments from command line.

**Required arguments**:
1. `stats` — path to a file with stats that were founded by [get_raw_issues_statistics.py](../issues_statistics/get_raw_issues_statistics.py).
1. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).
2. `save_dir` — directory where the plotted charts will be saved.
3. `config_path` — path to the yaml file containing information about the charts to be plotted. A description of the config and its example is provided in [this section](#config-1).

**Optional arguments**:

Argument | Description
--- | ---
**‑‑file‑extension** | Allows you to select the extension of output files. Available extensions: `.png`, `.jpg`, `.jpeg`, `.webp`, `.svg`, `.pdf`, `.eps`, `.json`. Default is `.svg`.
**‑‑group‑stats** | If present, there will be several languages on the charts at once.

### Config
The configuration file is a dictionary in yaml format, where for each column of the original dataset the types of graphs to be plotted are specified. You can also put the common parameters when plotting multiple graphs for one column in a separate `common` group.
The configuration file is a dictionary in yaml format, where
1) paths to datasets with statistics are specified
2) for each column of the original dataset, the types of graphs to be plotted are specified. You can also put the general parameters when plotting multiple graphs for one column in a separate `common` group.

**Possible values of the charts**:
* `line_chart`
Expand Down Expand Up @@ -141,10 +143,18 @@ The result will be two graphs: line chart and histogram. The values in both char
### Examples

#### Line chart
<img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart.png" width="500">
<p align="middle">
<img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart.png" width="49%" />
<img src="./examples/CYCLOMATIC_COMPLEXITY_line_chart_grouped.png" width="49%" />
</p>

#### Box plot
<img src="./examples/BEST_PRACTICES_box_plot.png" width="500">
<p align="middle">
<img src="./examples/BEST_PRACTICES_box_plot.png" width="49%" />
<img src="./examples/BEST_PRACTICES_box_plot_grouped.png" width="49%" />
</p>

#### Histogram
<img src="./examples/CODE_STYLE_ratio_histogram.png" width="500">
<p align="middle">
<img src="./examples/CODE_STYLE_ratio_histogram.png" width="49%" />
</p>
33 changes: 32 additions & 1 deletion src/python/evaluation/plots/common/plotly_consts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from enum import Enum

import plotly.express as px


class MARGIN(Enum):
ZERO = {'l': 0, 'r': 0, 'b': 0, 't': 0}
Expand All @@ -13,7 +15,10 @@ class SORT_ORDER(Enum): # noqa: N801


class COLOR(Enum):
# Colors from px.colors.DEFAULT_PLOTLY_COLORS
"""
Colors from px.colors.DEFAULT_PLOTLY_COLORS
"""

BLUE = "rgb(31, 119, 180)"
ORANGE = "rgb(255, 127, 14)"
GREEN = "rgb(44, 160, 44)"
Expand All @@ -24,3 +29,29 @@ class COLOR(Enum):
GRAY = "rgb(127, 127, 127)"
YELLOW = "rgb(188, 189, 34)"
CYAN = "rgb(23, 190, 207)"


class COLORWAY(Enum): # noqa: N801
"""
Colors from px.colors.qualitative
"""

PLOTLY = px.colors.qualitative.Plotly
D3 = px.colors.qualitative.D3
G10 = px.colors.qualitative.G10
T10 = px.colors.qualitative.T10
ALPHABET = px.colors.qualitative.Alphabet
DARK24 = px.colors.qualitative.Dark24
LIGHT24 = px.colors.qualitative.Light24
SET1 = px.colors.qualitative.Set1
PASTEL1 = px.colors.qualitative.Pastel1
DARK2 = px.colors.qualitative.Dark2
SET2 = px.colors.qualitative.Set2
PASTEL2 = px.colors.qualitative.Pastel2
SET3 = px.colors.qualitative.Set3
ANTIQUE = px.colors.qualitative.Antique
BOLD = px.colors.qualitative.Bold
PASTEL = px.colors.qualitative.Pastel
PRISM = px.colors.qualitative.Prism
SAFE = px.colors.qualitative.Safe
VIVID = px.colors.qualitative.Vivid
120 changes: 92 additions & 28 deletions src/python/evaluation/plots/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,101 @@
from src.python.evaluation.plots.common import plotly_consts
from src.python.review.common.file_system import Extension

COLOR = Optional[plotly_consts.COLOR]
COLORWAY = Optional[plotly_consts.COLORWAY]
MARGIN = Optional[plotly_consts.MARGIN]
SORT_ORDER = Optional[plotly_consts.SORT_ORDER]
LINES = Optional[Dict[int, Optional[str]]]


def get_supported_extensions() -> List[str]:
extensions = Extension.get_image_extensions()
extensions.append(Extension.JSON)
extensions.append(Extension.HTML)
return [extension.value for extension in extensions]


def create_bar_plot(
df: pd.DataFrame,
*,
x_axis: str,
y_axis: str,
margin: Optional[plotly_consts.MARGIN] = None,
sort_order: Optional[plotly_consts.SORT_ORDER] = None,
color: Optional[plotly_consts.COLOR] = None,
margin: MARGIN = None,
sort_order: SORT_ORDER = None,
color: COLOR = None,
) -> go.Figure:
fig = px.bar(df, x=x_axis, y=y_axis, text=y_axis)
update_figure(fig, margin, sort_order, color)
update_figure(fig, margin=margin, sort_order=sort_order, color=color)
return fig


def create_box_trace(
df: pd.DataFrame,
*,
x_column: Optional[str] = None,
y_column: Optional[str] = None,
color: COLOR = None,
) -> go.Box:
return go.Box(
x=df[x_column] if x_column is not None else None,
y=df[y_column] if y_column is not None else None,
line={'color': color.value if color is not None else None},
)


def create_box_plot(
df: pd.DataFrame,
x_axis: str,
y_axis: str,
margin: Optional[plotly_consts.MARGIN] = None,
sort_order: Optional[plotly_consts.SORT_ORDER] = None,
color: Optional[plotly_consts.COLOR] = None,
horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
*,
x_axis: Optional[str],
y_axis: Optional[str],
margin: MARGIN = None,
sort_order: SORT_ORDER = None,
color: COLOR = None,
horizontal_lines: LINES = None,
) -> go.Figure:
fig = px.box(df, x=x_axis, y=y_axis)
update_figure(fig, margin=margin, sort_order=sort_order, color=color, horizontal_lines=horizontal_lines)
fig = go.Figure(create_box_trace(df, x_column=x_axis, y_column=y_axis, color=color))
update_figure(
fig,
margin=margin,
sort_order=sort_order,
horizontal_lines=horizontal_lines,
x_axis_name=x_axis,
y_axis_name=y_axis,
)
return fig


def create_scatter_trace(
df: pd.DataFrame,
*,
x_column: str,
y_column: str,
color: COLOR = None,
) -> go.Scatter:
return go.Scatter(
x=df[x_column],
y=df[y_column],
line={'color': color.value if color is not None else None},
)


def create_line_chart(
df: pd.DataFrame,
*,
x_axis: str,
y_axis: str,
margin: Optional[plotly_consts.MARGIN] = None,
color: Optional[plotly_consts.COLOR] = None,
vertical_lines: Optional[Dict[int, Optional[str]]] = None,
margin: MARGIN = None,
color: COLOR = None,
vertical_lines: LINES = None,
) -> go.Figure:
fig = px.line(df, x=x_axis, y=y_axis)
update_figure(fig, margin=margin, color=color, vertical_lines=vertical_lines)
fig = go.Figure(create_scatter_trace(df, x_column=x_axis, y_column=y_axis, color=color))
update_figure(
fig,
margin=margin,
vertical_lines=vertical_lines,
x_axis_name=x_axis,
y_axis_name=y_axis,
)
return fig


Expand All @@ -60,24 +111,31 @@ def create_histogram(
x_axis: str,
y_axis: str,
n_bins: Optional[int] = None,
margin: Optional[plotly_consts.MARGIN] = None,
color: Optional[plotly_consts.COLOR] = None,
vertical_lines: Optional[Dict[int, Optional[str]]] = None,
margin: MARGIN = None,
color: COLOR = None,
vertical_lines: LINES = None,
) -> go.Figure:
fig = px.histogram(df, x=x_axis, y=y_axis, nbins=n_bins)
update_figure(
fig, margin=margin, color=color, vertical_lines=vertical_lines, x_axis_name=x_axis, y_axis_name=y_axis,
fig,
margin=margin,
color=color,
vertical_lines=vertical_lines,
x_axis_name=x_axis,
y_axis_name=y_axis,
)
return fig


def update_figure(
fig: go.Figure,
margin: Optional[plotly_consts.MARGIN] = None,
sort_order: Optional[plotly_consts.SORT_ORDER] = None,
color: Optional[plotly_consts.COLOR] = None,
horizontal_lines: Optional[Dict[int, Optional[str]]] = None,
vertical_lines: Optional[Dict[int, Optional[str]]] = None,
*,
margin: MARGIN = None,
sort_order: SORT_ORDER = None,
color: COLOR = None,
colorway: COLORWAY = None,
horizontal_lines: LINES = None,
vertical_lines: LINES = None,
x_axis_name: Optional[str] = None,
y_axis_name: Optional[str] = None,
) -> None:
Expand All @@ -95,6 +153,9 @@ def update_figure(
if y_axis_name is not None:
new_layout['yaxis_title'] = y_axis_name

if colorway is not None:
new_layout['colorway'] = colorway.value

fig.update_layout(**new_layout)

new_trace = {}
Expand All @@ -110,7 +171,7 @@ def update_figure(

if vertical_lines is not None:
for x, annotation in vertical_lines.items():
fig.add_vline(x=x, annotation_text=annotation)
fig.add_vline(x=x, annotation_text=annotation, annotation_textangle=90)


def save_plot(
Expand All @@ -121,4 +182,7 @@ def save_plot(
) -> None:
os.makedirs(dir_path, exist_ok=True)
file = dir_path / f"{plot_name}{extension.value}"
fig.write_image(str(file))
if extension == Extension.HTML:
fig.write_html(str(file))
else:
fig.write_image(str(file))
Binary file modified src/python/evaluation/plots/examples/BEST_PRACTICES_box_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 35 additions & 5 deletions src/python/evaluation/plots/plotters/diffs_plotters.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def _get_dataframe_from_dict(


def _extract_stats_from_issues_statistics(
statistics: IssuesStatistics, limit: int, only_unique: bool,
statistics: IssuesStatistics,
limit: int,
only_unique: bool,
) -> Dict[IssueType, int]:
categorized_statistics = statistics.get_short_categorized_statistics()

Expand Down Expand Up @@ -61,7 +63,14 @@ def get_unique_issues_by_category(
key_mapper=lambda issue_type: issue_type.name,
)

return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
return create_bar_plot(
df,
x_axis=x_axis_name,
y_axis=y_axis_name,
margin=margin,
sort_order=sort_order,
color=color,
)


def get_issues_by_category(
Expand All @@ -82,7 +91,14 @@ def get_issues_by_category(
key_mapper=lambda issue_type: issue_type.name,
)

return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
return create_bar_plot(
df,
x_axis=x_axis_name,
y_axis=y_axis_name,
margin=margin,
sort_order=sort_order,
color=color,
)


def get_median_penalty_influence_by_category(
Expand All @@ -105,7 +121,14 @@ def get_median_penalty_influence_by_category(
value_mapper=lambda influence: median(influence),
)

return create_bar_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
return create_bar_plot(
df,
x_axis=x_axis_name,
y_axis=y_axis_name,
margin=margin,
sort_order=sort_order,
color=color,
)


def get_penalty_influence_distribution(
Expand All @@ -128,4 +151,11 @@ def get_penalty_influence_distribution(
)
df = df.explode(y_axis_name)

return create_box_plot(df, x_axis_name, y_axis_name, margin, sort_order, color)
return create_box_plot(
df,
x_axis=x_axis_name,
y_axis=y_axis_name,
margin=margin,
sort_order=sort_order,
color=color,
)
Loading