diff --git a/src/python/common/tool_arguments.py b/src/python/common/tool_arguments.py index b285ac23..d3048051 100644 --- a/src/python/common/tool_arguments.py +++ b/src/python/common/tool_arguments.py @@ -86,3 +86,6 @@ class RunToolArgument(Enum): f'"{ColumnName.LANG.value}" column are: ' f'{LanguageVersion.PYTHON_3.value}, {LanguageVersion.JAVA_8.value}, ' f'{LanguageVersion.JAVA_11.value}, {LanguageVersion.KOTLIN.value}.') + + DIFFS_FILE_PATH = ArgumentsInfo(None, 'diffs_file_path', + 'Path to a file with serialized diffs that were founded by diffs_between_df.py') diff --git a/src/python/evaluation/common/pandas_util.py b/src/python/evaluation/common/pandas_util.py index bb956759..987ef030 100644 --- a/src/python/evaluation/common/pandas_util.py +++ b/src/python/evaluation/common/pandas_util.py @@ -1,14 +1,17 @@ +import json import logging from pathlib import Path -from typing import Set, Union +from typing import Any, List, Set, Union import numpy as np import pandas as pd from src.python.evaluation.common.csv_util import write_dataframe_to_csv -from src.python.evaluation.common.util import ColumnName +from src.python.evaluation.common.util import ColumnName, EvaluationArgument from src.python.evaluation.common.xlsx_util import create_workbook, remove_sheet, write_dataframe_to_xlsx_sheet from src.python.review.application_config import LanguageVersion from src.python.review.common.file_system import Extension, get_restricted_extension +from src.python.review.inspectors.issue import BaseIssue +from src.python.review.reviewers.utils.print_review import convert_json_to_issues logger = logging.getLogger(__name__) @@ -18,6 +21,10 @@ def filter_df_by_language(df: pd.DataFrame, languages: Set[LanguageVersion], return df.loc[df[column].isin(set(map(lambda l: l.value, languages)))] +def filter_df_by_condition(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: + return df.loc[df[column] == value] + + def drop_duplicates(df: pd.DataFrame, column: str = ColumnName.CODE.value) -> pd.DataFrame: return df.drop_duplicates(column, keep='last') @@ -85,3 +92,12 @@ def write_df_to_file(df: pd.DataFrame, output_file_path: Path, extension: Extens write_dataframe_to_xlsx_sheet(output_file_path, df, 'inspection_results') # remove empty sheet that was initially created with the workbook remove_sheet(output_file_path, 'Sheet') + + +def get_issues_from_json(str_json: str) -> List[BaseIssue]: + parsed_json = json.loads(str_json)['issues'] + return convert_json_to_issues(parsed_json) + + +def get_issues_by_row(df: pd.DataFrame, row: int) -> List[BaseIssue]: + return get_issues_from_json(df.iloc[row][EvaluationArgument.TRACEBACK.value]) diff --git a/src/python/evaluation/common/util.py b/src/python/evaluation/common/util.py index b1c501b8..271956f1 100644 --- a/src/python/evaluation/common/util.py +++ b/src/python/evaluation/common/util.py @@ -15,6 +15,7 @@ class ColumnName(Enum): ROW = 'row' OLD = 'old' NEW = 'new' + IS_PUBLIC = 'is_public' @unique diff --git a/src/python/evaluation/inspectors/README.md b/src/python/evaluation/inspectors/README.md index e34a3b01..a0de1314 100644 --- a/src/python/evaluation/inspectors/README.md +++ b/src/python/evaluation/inspectors/README.md @@ -11,6 +11,10 @@ This module contains _preprocessing_ stage and _analysing_ stage. `Analysing` stage includes: - [diffs_between_df.py](diffs_between_df.py) allows finding a difference between old and new grades and collect issues that were found in new data +- [print_inspectors_statistics.py](print_inspectors_statistics.py) allows printing statistics + that were found by [diffs_between_df.py](diffs_between_df.py) +- [get_worse_public_examples.py](get_worse_public_examples.py) allows getting + top N worse public examples from a dataset. The measure is to count unique new inspections. ___ @@ -136,4 +140,96 @@ An example of the pickle` file is: } ``` In the `grade` field are stored fragments ids for which grade was increased in the new data. -In the `traceback` field for fragments ids are stored set of issues. These issues were found in the new data and were not found in the old data. \ No newline at end of file +In the `traceback` field for fragments ids are stored set of issues. These issues were found in the new data and were not found in the old data. + +___ + +### Print statistics + +[print_inspectors_statistics.py](print_inspectors_statistics.py) allows print statistics + that were calculated by [diffs_between_df.py](diffs_between_df.py) + +#### Usage + +Run the [print_inspectors_statistics.py](print_inspectors_statistics.py) with the arguments from command line. + +Required arguments: + +- `diffs_file_path` — path to a `pickle` file, that was calculated by [diffs_between_df.py](diffs_between_df.py). + +Optional arguments: +Argument | Description +--- | --- +|**‑‑categorize**| If True, statistics will be categorized by several categories. By default is disabled.| +|**‑n**, **‑‑top_n**| The top N items will be printed. Default value is 10.| +|**‑‑full_stat**| If True, full statistics (with all issues) will be printed. By default is disabled.| + +The statistics will be printed into console. + +The output contains: +- was found incorrect grades or not; +- how many fragments has additional issues; +- how many unique issues was found; +- top N issues in the format: (issue_key, frequency); +- short categorized statistics: for each category how many issues were found and how many + fragments have these issues; +- \[Optional\] full categorized statistics: for each category for each issue how many + fragments have this issue + +An example of the printed statistics (without full categorized statistics): + +```json +SUCCESS! Was not found incorrect grades. +______ +39830 fragments has additional issues +139 unique issues was found +______ +Top 10 issues: +SC200: 64435 times +WPS432: 17477 times +WPS221: 10618 times +WPS336: 4965 times +H601: 3826 times +SC100: 2719 times +WPS319: 2655 times +WPS317: 2575 times +WPS515: 1783 times +WPS503: 1611 times +______ +CODE_STYLE: 28 issues, 26171 fragments +BEST_PRACTICES: 76 issues, 88040 fragments +ERROR_PRONE: 17 issues, 2363 fragments +COMPLEXITY: 17 issues, 13928 fragments +COHESION: 1 issues, 3826 fragments +______ +``` + +--- + +### Get worse public examples + +[get_worse_public_examples.py](get_worse_public_examples.py) allows getting + top N worse public examples from a dataset. The measure is to count unique new inspections. + +#### Usage + +Run the [get_worse_public_examples.py](get_worse_public_examples.py) with the arguments from command line. + +Required arguments: + +- `solutions_file_path` — path to xlsx-file or csv-file with graded code samples; +- `diffs_file_path` — path to a `pickle` file, that was calculated by [diffs_between_df.py](diffs_between_df.py). + +Please, note that your `solutions_file_path` file with code fragments should consist of at least 2 obligatory columns: + +- `code`, +- `traceback`, +- `is_public`, +- `id`. + +Optional arguments: +Argument | Description +--- | --- +|**‑n**, **‑‑n**| The N worse fragments will be saved.| + +The resulting file will be stored in the same folder as the `solutions_file_path` input file. diff --git a/src/python/evaluation/inspectors/common/__init__.py b/src/python/evaluation/inspectors/common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/inspectors/common/statistics.py b/src/python/evaluation/inspectors/common/statistics.py new file mode 100644 index 00000000..a36cefb7 --- /dev/null +++ b/src/python/evaluation/inspectors/common/statistics.py @@ -0,0 +1,56 @@ +from collections import defaultdict +from dataclasses import dataclass +from typing import Dict, List, Tuple + +from src.python.review.inspectors.issue import IssueType, ShortIssue + + +@dataclass(frozen=True) +class IssuesStatistics: + stat: Dict[ShortIssue, int] + changed_grades_count: int + + def print_full_statistics(self, to_categorize: bool = True): + if to_categorize: + categorized_statistics: Dict[IssueType, Dict[ShortIssue, int]] = self.get_categorized_statistics() + for category, issues in categorized_statistics.items(): + print(f'{category.value} issues:') + self.__print_stat(issues) + else: + self.__print_stat(self.stat) + + @classmethod + def __print_stat(cls, stat: Dict[ShortIssue, int]): + for issue, freq in stat.items(): + cls.print_issue_with_freq(issue, freq, prefix='- ') + + @classmethod + def print_issue_with_freq(cls, issue: ShortIssue, freq: int, prefix: str = '', suffix: str = '') -> None: + print(f'{prefix}{issue.origin_class}: {freq} times{suffix}') + + def get_categorized_statistics(self) -> Dict[IssueType, Dict[ShortIssue, int]]: + categorized_stat: Dict[IssueType, Dict[ShortIssue, int]] = defaultdict(dict) + for issue, freq in self.stat.items(): + categorized_stat[issue.type][issue] = freq + return categorized_stat + + # Get statistics for each IssueType: count unique issues, count fragments with these issues + def get_short_categorized_statistics(self) -> Dict[IssueType, Tuple[int, int]]: + categorized_statistics: Dict[IssueType, Dict[ShortIssue, int]] = self.get_categorized_statistics() + short_categorized_statistics = defaultdict(tuple) + for issue_type, stat in categorized_statistics.items(): + unique_issues = len(stat) + fragments = sum(stat.values()) + short_categorized_statistics[issue_type] = (unique_issues, fragments) + return short_categorized_statistics + + def print_short_categorized_statistics(self) -> None: + short_categorized_statistics = self.get_short_categorized_statistics() + for category, stat in short_categorized_statistics.items(): + print(f'{category.value}: {stat[0]} issues, {stat[1]} fragments') + + def get_top_n_issues(self, n: int) -> List[ShortIssue]: + return sorted(self.stat.items(), key=lambda t: t[1], reverse=True)[:n] + + def count_unique_issues(self) -> int: + return len(self.stat) diff --git a/src/python/evaluation/inspectors/diffs_between_df.py b/src/python/evaluation/inspectors/diffs_between_df.py index 70e93331..c747175f 100644 --- a/src/python/evaluation/inspectors/diffs_between_df.py +++ b/src/python/evaluation/inspectors/diffs_between_df.py @@ -1,20 +1,16 @@ import argparse -import json from pathlib import Path -from typing import List import pandas as pd from src.python.common.tool_arguments import RunToolArgument from src.python.evaluation.common.pandas_util import ( - get_inconsistent_positions, get_solutions_df, get_solutions_df_by_file_path, + get_inconsistent_positions, get_issues_by_row, get_solutions_df, get_solutions_df_by_file_path, ) from src.python.evaluation.common.util import ColumnName, EvaluationArgument from src.python.review.common.file_system import ( Extension, get_parent_folder, get_restricted_extension, serialize_data_and_write_to_file, ) -from src.python.review.inspectors.issue import BaseIssue from src.python.review.quality.model import QualityType -from src.python.review.reviewers.utils.print_review import convert_json_to_issues def configure_arguments(parser: argparse.ArgumentParser) -> None: @@ -31,11 +27,6 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None: f'(file contains grade and traceback (optional) columns)') -def __get_issues(df: pd.DataFrame, row: int) -> List[BaseIssue]: - parsed_json = json.loads(df.iloc[row][EvaluationArgument.TRACEBACK.value])['issues'] - return convert_json_to_issues(parsed_json) - - # Find difference between two dataframes. Return dict: # { # grade: [list_of_fragment_ids], @@ -63,8 +54,8 @@ def find_diffs(old_df: pd.DataFrame, new_df: pd.DataFrame) -> dict: diffs[ColumnName.GRADE.value].append(fragment_id) else: # Find difference between issues - old_issues = __get_issues(old_df, row) - new_issues = __get_issues(new_df, row) + old_issues = get_issues_by_row(old_df, row) + new_issues = get_issues_by_row(new_df, row) if len(old_issues) > len(new_issues): raise ValueError(f'New dataframe contains less issues than old for fragment {id}') difference = set(set(new_issues) - set(old_issues)) diff --git a/src/python/evaluation/inspectors/filter_issues.py b/src/python/evaluation/inspectors/filter_issues.py new file mode 100644 index 00000000..ca4b38b6 --- /dev/null +++ b/src/python/evaluation/inspectors/filter_issues.py @@ -0,0 +1,70 @@ +import argparse +from pathlib import Path +from typing import List, Set + +import pandas as pd +from src.python.common.tool_arguments import RunToolArgument +from src.python.evaluation.common.pandas_util import get_issues_from_json, get_solutions_df_by_file_path +from src.python.evaluation.common.util import ColumnName, EvaluationArgument +from src.python.review.common.file_system import Extension, get_parent_folder, serialize_data_and_write_to_file +from src.python.review.inspectors.issue import BaseIssue + + +TRACEBACK = EvaluationArgument.TRACEBACK.value +ID = ColumnName.ID.value +GRADE = ColumnName.GRADE.value + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help=f'{RunToolArgument.SOLUTIONS_FILE_PATH.value.description}' + f'\nAll code fragments from this file must be graded ') + + parser.add_argument('-i', '--issues', + help='Set of issues', + default='') + + +def __parse_issues_arg(str_issues: str) -> Set[str]: + return set(str_issues.split(',')) + + +def __get_new_issues(traceback: str, new_issues_classes: Set[str]) -> List[BaseIssue]: + all_issues = get_issues_from_json(traceback) + return list(filter(lambda i: i.origin_class in new_issues_classes, all_issues)) + + +def __add_issues_for_fragment(fragment_id: int, new_issues: List[BaseIssue], diffs: dict) -> None: + if len(new_issues) > 0: + diffs[TRACEBACK][fragment_id] = new_issues + + +# Make a dict with the same structure as in the find_diffs function from diffs_between_df.py +def get_statistics_dict(solutions_df: pd.DataFrame, new_issues_classes: Set[str]) -> dict: + diffs = { + GRADE: [], + TRACEBACK: {}, + } + solutions_df.apply(lambda row: __add_issues_for_fragment(row[ID], + __get_new_issues(row[TRACEBACK], new_issues_classes), + diffs), axis=1) + return diffs + + +def main() -> None: + parser = argparse.ArgumentParser() + configure_arguments(parser) + args = parser.parse_args() + + solutions_file_path = args.solutions_file_path + solutions_df = get_solutions_df_by_file_path(solutions_file_path) + issues = __parse_issues_arg(args.issues) + + diffs = get_statistics_dict(solutions_df, issues) + output_path = get_parent_folder(Path(solutions_file_path)) / f'diffs{Extension.PICKLE.value}' + serialize_data_and_write_to_file(output_path, diffs) + + +if __name__ == '__main__': + main() diff --git a/src/python/evaluation/inspectors/get_worse_public_examples.py b/src/python/evaluation/inspectors/get_worse_public_examples.py new file mode 100644 index 00000000..1bb036c5 --- /dev/null +++ b/src/python/evaluation/inspectors/get_worse_public_examples.py @@ -0,0 +1,68 @@ +import argparse +from pathlib import Path +from typing import Dict, List + +import pandas as pd +from src.python.common.tool_arguments import RunToolArgument +from src.python.evaluation.common.csv_util import write_dataframe_to_csv +from src.python.evaluation.common.pandas_util import filter_df_by_condition, get_solutions_df_by_file_path +from src.python.evaluation.common.util import ColumnName, EvaluationArgument +from src.python.review.common.file_system import deserialize_data_from_file, Extension, get_parent_folder +from src.python.review.inspectors.issue import BaseIssue + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help=RunToolArgument.SOLUTIONS_FILE_PATH.value.description) + + parser.add_argument(RunToolArgument.DIFFS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help=RunToolArgument.DIFFS_FILE_PATH.value.description) + + parser.add_argument('-n', '--n', + help='The N worse fragments will be saved', + type=int, + default=10) + + +def __get_new_inspections(fragment_id_to_issues: Dict[int, List[BaseIssue]], fragment_id: int) -> str: + return ','.join(set(map(lambda i: i.origin_class, fragment_id_to_issues.get(fragment_id, [])))) + + +def __get_public_fragments(solutions_df: pd.DataFrame, diffs_dict: dict) -> pd.DataFrame: + # Keep only public solutions + public_fragments = filter_df_by_condition(solutions_df, ColumnName.IS_PUBLIC.value, 'YES') + count_inspections_column = 'count_inspections' + new_inspections_column = 'new_inspections' + + # Get only new inspections and count them + fragment_id_to_issues = diffs_dict[EvaluationArgument.TRACEBACK.value] + public_fragments[new_inspections_column] = public_fragments.apply( + lambda row: __get_new_inspections(fragment_id_to_issues, row[ColumnName.ID.value]), axis=1) + public_fragments[count_inspections_column] = public_fragments.apply( + lambda row: len(row[new_inspections_column].split(',')), axis=1) + + public_fragments = public_fragments.sort_values(count_inspections_column, ascending=False) + # Keep only public columns + return public_fragments[[ColumnName.CODE.value, EvaluationArgument.TRACEBACK.value, new_inspections_column]] + + +# TODO: add readme +def main() -> None: + parser = argparse.ArgumentParser() + configure_arguments(parser) + args = parser.parse_args() + + solutions_file_path = args.solutions_file_path + solutions_df = get_solutions_df_by_file_path(solutions_file_path) + diffs = deserialize_data_from_file(args.diffs_file_path) + + public_fragments = __get_public_fragments(solutions_df, diffs) + + output_path = get_parent_folder(Path(solutions_file_path)) / f'worse_fragments{Extension.CSV.value}' + write_dataframe_to_csv(output_path, public_fragments.head(args.n)) + + +if __name__ == '__main__': + main() diff --git a/src/python/evaluation/inspectors/print_inspectors_statistics.py b/src/python/evaluation/inspectors/print_inspectors_statistics.py new file mode 100644 index 00000000..8b132a31 --- /dev/null +++ b/src/python/evaluation/inspectors/print_inspectors_statistics.py @@ -0,0 +1,84 @@ +import argparse +from collections import defaultdict +from pathlib import Path +from typing import Dict + +from src.python.common.tool_arguments import RunToolArgument +from src.python.evaluation.common.util import ColumnName, EvaluationArgument +from src.python.evaluation.inspectors.common.statistics import IssuesStatistics +from src.python.review.common.file_system import deserialize_data_from_file +from src.python.review.inspectors.issue import ShortIssue + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument(RunToolArgument.DIFFS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help=RunToolArgument.DIFFS_FILE_PATH.value.description) + + parser.add_argument('--categorize', + help='If True, statistics will be categorized by several categories.', + action='store_true') + + parser.add_argument('-n', '--top_n', + help='The top N items will be printed', + type=int, + default=10) + + parser.add_argument('--full_stat', + help='If True, full statistics will be printed.', + action='store_true') + + +def has_incorrect_grades(diffs_dict: dict) -> bool: + return len(diffs_dict[ColumnName.GRADE.value]) > 0 + + +def gather_statistics(diffs_dict: dict) -> IssuesStatistics: + changed_grades_count = len(diffs_dict[EvaluationArgument.TRACEBACK.value]) + issues_dict: Dict[ShortIssue, int] = defaultdict(int) + for _, issues in diffs_dict[EvaluationArgument.TRACEBACK.value].items(): + for issue in issues: + short_issue = ShortIssue(origin_class=issue.origin_class, type=issue.type) + issues_dict[short_issue] += 1 + return IssuesStatistics(issues_dict, changed_grades_count) + + +def __print_top_n(statistics: IssuesStatistics, n: int, separator: str) -> None: + top_n = statistics.get_top_n_issues(n) + print(separator) + print(f'Top {n} issues:') + for issue, freq in top_n: + IssuesStatistics.print_issue_with_freq(issue, freq) + print(separator) + + +def main() -> None: + parser = argparse.ArgumentParser() + configure_arguments(parser) + args = parser.parse_args() + + separator = '______' + + diffs = deserialize_data_from_file(args.diffs_file_path) + if has_incorrect_grades(diffs): + print(f'WARNING! Was found incorrect grades in the following fragments: {diffs[ColumnName.GRADE.value]}.') + else: + print('SUCCESS! Was not found incorrect grades.') + print(separator) + + statistics = gather_statistics(diffs) + print(f'{statistics.changed_grades_count} fragments has additional issues') + print(f'{statistics.count_unique_issues()} unique issues was found') + + n = args.top_n + __print_top_n(statistics, n, separator) + + statistics.print_short_categorized_statistics() + print(separator) + + if args.full_stat: + statistics.print_full_statistics() + + +if __name__ == '__main__': + main() diff --git a/src/python/review/inspectors/issue.py b/src/python/review/inspectors/issue.py index c910bf80..965f2262 100644 --- a/src/python/review/inspectors/issue.py +++ b/src/python/review/inspectors/issue.py @@ -66,16 +66,21 @@ def get_base_issue_data_dict(cls, @dataclass(frozen=True, eq=True) -class BaseIssue: +class ShortIssue: + origin_class: str + + type: IssueType + + +@dataclass(frozen=True, eq=True) +class BaseIssue(ShortIssue): + description: str + file_path: Path line_no: int column_no: int - description: str - origin_class: str - inspector_type: InspectorType - type: IssueType class Measurable(abc.ABC):