diff --git a/src/python/evaluation/statistics/README.md b/src/python/evaluation/issues_statistics/README.md similarity index 100% rename from src/python/evaluation/statistics/README.md rename to src/python/evaluation/issues_statistics/README.md diff --git a/src/python/evaluation/statistics/__init__.py b/src/python/evaluation/issues_statistics/__init__.py similarity index 100% rename from src/python/evaluation/statistics/__init__.py rename to src/python/evaluation/issues_statistics/__init__.py diff --git a/src/python/evaluation/statistics/common/__init__.py b/src/python/evaluation/issues_statistics/common/__init__.py similarity index 100% rename from src/python/evaluation/statistics/common/__init__.py rename to src/python/evaluation/issues_statistics/common/__init__.py diff --git a/src/python/evaluation/statistics/common/raw_issue_encoder_decoder.py b/src/python/evaluation/issues_statistics/common/raw_issue_encoder_decoder.py similarity index 100% rename from src/python/evaluation/statistics/common/raw_issue_encoder_decoder.py rename to src/python/evaluation/issues_statistics/common/raw_issue_encoder_decoder.py diff --git a/src/python/evaluation/statistics/get_raw_issues.py b/src/python/evaluation/issues_statistics/get_raw_issues.py similarity index 98% rename from src/python/evaluation/statistics/get_raw_issues.py rename to src/python/evaluation/issues_statistics/get_raw_issues.py index 19791c79..b077f34f 100644 --- a/src/python/evaluation/statistics/get_raw_issues.py +++ b/src/python/evaluation/issues_statistics/get_raw_issues.py @@ -15,7 +15,7 @@ from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path, write_df_to_file from src.python.evaluation.common.util import ColumnName from src.python.evaluation.evaluation_run_tool import get_language_version -from src.python.evaluation.statistics.common.raw_issue_encoder_decoder import RawIssueEncoder +from src.python.evaluation.issues_statistics.common.raw_issue_encoder_decoder import RawIssueEncoder from src.python.review.common.file_system import ( create_file, Extension, diff --git a/src/python/evaluation/paper_evaluation/README.md b/src/python/evaluation/paper_evaluation/README.md new file mode 100644 index 00000000..5dac490b --- /dev/null +++ b/src/python/evaluation/paper_evaluation/README.md @@ -0,0 +1,7 @@ +# Paper evaluation + +This module contains scripts for SIGCSE-2022 paper evaluation: + +- [Comparison with other tools](./comparison_with_other_tools/README.md) +- Formatting issues importance +- [Dynamics of student usage](./user_dynamics/README.md) \ No newline at end of file diff --git a/src/python/evaluation/paper_evaluation/__init__.py b/src/python/evaluation/paper_evaluation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md new file mode 100644 index 00000000..618165f7 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md @@ -0,0 +1,58 @@ +# Comparison with other tools evaluation + +This module allows getting statistic about using of several code quality tools. +In our work we compare the Hyperstyle tool with the [Tutor](https://www.hkeuning.nl/rpt/) tool. +Other tools (FrenchPress, WebTA, and AutoStyle) does not have open sources. + +To get statistics we use students solutions for six programming tasks, +but the main script can gather this statistics for any tasks. + +The tasks from the out dataset: +- **countEven**. The `countEven` method returns the number of even integers in the values-array. +- **sumValues**. The `sumValues` method adds up all numbers from the values-array, + or only the positive numbers if the `positivesOnly` boolean parameter is set + to `true`. +- **oddSum**. The method `oddSum` returns the sum of all numbers at an odd index + in the array parameter, until the number -1 is seen at an odd index. +- **calculateScore**. The `calculateScore` method calculates the score for a train trip. + The highest score is 10. The score is based on the number of changes and the day of + the week (Monday is 1, Sunday is 7). +- **hasDoubled**. Write a program that calculates in how many years your savings + have doubled with the given interest. +- **haveThree**. Given an array of ints, return true if the value 3 appears in the + array exactly 3 times, and no 3's are next to each other. + +The dataset has several columns: +- Student id (student_id); +- Task key (task_key); +- Code fragment (solution); +- Tutor error, if it is existed (tutor_error); +- Tutor issues keys (tutor_issues); +- Hyperstyle issues keys (hyperstyle_issues); +- Hyperstyle INFO issues keys (hyperstyle_info_issues); +- Code style issues count (code_style_issues_count). + +The dataset stores in the `csv` format. + +## Usage + +Run the [statistics_gathering.py](statistics_gathering.py) with the arguments from command line. + +Required arguments: + +`solutions_file_path` — path to csv-file with code samples. + +The statistics will be printed in the terminal. The statistics includes: +- Unique users count; +- Code snippets count; +- Tasks statistics: for each task count code snippets and count snippets with the Tutor errors; +- Count code fragments has Tutor errors; +- Count of unique errors was found in Tutor; +- Error statistics: for each error get the error text and frequency; +- Issues statistics: + - Count of unique issues in total; + - Common issues statistics: for all common issues for Hyperstyle and Tutor count frequency of this issue; + - Tutor unique issues statistics: for all Tutor issues (that were not found by Hyperstyle) count frequency of this issue; + - Hyperstyle unique issues statistics: for all Hyperstyle issues (that were not found by Tutor) count frequency of this issue; + - Count code style issues and count fragments with these issues. + diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/__init__.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py new file mode 100644 index 00000000..130092eb --- /dev/null +++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py @@ -0,0 +1,58 @@ +import argparse +import logging +import sys +from pathlib import Path + +from src.python.common.tool_arguments import RunToolArgument +from src.python.evaluation.common.pandas_util import get_solutions_df +from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import ( + IssuesStatistics, TutorStatistics, +) +from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import ComparisonColumnName +from src.python.review.common.file_system import Extension, get_restricted_extension + +sys.path.append('') +sys.path.append('../../..') + +logger = logging.getLogger(__name__) + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help='Local CSV-file path with feedback from different tools. ' + 'Your file must include column-names:' + f'"{ComparisonColumnName.STUDENT_ID.name}" and ' + f'"{ComparisonColumnName.TASK_KEY.name}" and ' + f'"{ComparisonColumnName.SOLUTION.name}" and ' + f'"{ComparisonColumnName.TUTOR_ERROR.name}" and ') + + +def main() -> int: + parser = argparse.ArgumentParser() + configure_arguments(parser) + + try: + args = parser.parse_args() + solutions_file_path = args.solutions_file_path + extension = get_restricted_extension(solutions_file_path, [Extension.CSV]) + solutions_df = get_solutions_df(extension, solutions_file_path) + tutor_stat = TutorStatistics(solutions_df, to_drop_duplicates=True) + tutor_stat.print_tasks_stat() + tutor_stat.print_error_stat() + print('ISSUES STAT:') + issue_stat = IssuesStatistics(solutions_df) + issue_stat.print_issues_stat() + return 0 + + except FileNotFoundError: + logger.error('CSV-file with the specified name does not exists.') + return 2 + + except Exception: + logger.exception('An unexpected error.') + return 2 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py new file mode 100644 index 00000000..4e7bf2e6 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py @@ -0,0 +1,127 @@ +from collections import Counter +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Dict, List + +import pandas as pd +from src.python.evaluation.common.pandas_util import filter_df_by_single_value +from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import ( + ComparisonColumnName, ERROR_CONST, TutorTask, +) + + +def sort_freq_dict(freq_dict: Dict[Any, int]) -> Dict[Any, int]: + return dict(sorted(freq_dict.items(), key=lambda item: item[1], reverse=True)) + + +@dataclass +class TutorStatistics: + unique_users: int + task_to_freq: Dict[TutorTask, int] + task_to_error_freq: Dict[TutorTask, int] + error_to_freq: Dict[str, int] + fragments_with_error: int = 0 + + __separator: str = '----------' + + def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False): + if to_drop_duplicates: + solutions_df = solutions_df.drop_duplicates(ComparisonColumnName.SOLUTION.value) + self.unique_users = len(solutions_df[ComparisonColumnName.STUDENT_ID.value].unique()) + self.task_to_freq = defaultdict(int) + self.task_to_error_freq = defaultdict(int) + self.error_to_freq = defaultdict(int) + for task in TutorTask: + task_df = filter_df_by_single_value(solutions_df, ComparisonColumnName.TASK_KEY.value, task.value) + self.task_to_freq[task] = task_df.shape[0] + errors_list = list(map(lambda e_l: e_l.split(';'), + task_df[ComparisonColumnName.TUTOR_ERROR.value].dropna().values)) + for cell_errors in errors_list: + for error in cell_errors: + self.error_to_freq[error] += 1 + self.task_to_error_freq[task] += 1 + self.fragments_with_error += 1 + self.task_to_freq = sort_freq_dict(self.task_to_freq) + self.error_to_freq = sort_freq_dict(self.error_to_freq) + + def print_tasks_stat(self) -> None: + print(f'Unique users count: {self.unique_users}') + print(f'Code snippets count: {sum(self.task_to_freq.values())}') + print('Tasks statistics:') + for task, freq in self.task_to_freq.items(): + print(f'Task {task.value}: {freq} items; {self.task_to_error_freq[task]} with tutor errors') + print(self.__separator) + + def print_error_stat(self) -> None: + print(f'{self.fragments_with_error} code fragments has errors during running by Tutor') + print(f'{len(self.error_to_freq.keys())} unique errors was found in Tutor') + print('Error statistics:') + for error, freq in self.error_to_freq.items(): + print(f'{error}: {freq} items') + print(self.__separator) + + +@dataclass +class IssuesStatistics: + common_issue_to_freq: Dict[str, int] + tutor_uniq_issue_to_freq: Dict[str, int] + hyperstyle_uniq_issue_to_freq: Dict[str, int] + + code_style_issues_count: int + fragments_count_with_code_style_issues: int + + __separator: str = '----------' + + # TODO: info and code style issues + def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False): + if to_drop_duplicates: + solutions_df = solutions_df.drop_duplicates(ComparisonColumnName.SOLUTION.value) + self.common_issue_to_freq = defaultdict(int) + self.tutor_uniq_issue_to_freq = defaultdict(int) + self.hyperstyle_uniq_issue_to_freq = defaultdict(int) + solutions_df.apply(lambda row: self.__init_solution_df_row(row), axis=1) + self.common_issue_to_freq = sort_freq_dict(self.common_issue_to_freq) + self.tutor_uniq_issue_to_freq = sort_freq_dict(self.tutor_uniq_issue_to_freq) + self.hyperstyle_uniq_issue_to_freq = sort_freq_dict(self.hyperstyle_uniq_issue_to_freq) + self.code_style_issues_count = sum(solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value]) + self.fragments_count_with_code_style_issues = len(list( + filter(lambda x: x != 0, solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value]))) + + @staticmethod + def __parse_issues(issues_str: str) -> List[str]: + if pd.isna(issues_str) or issues_str == ERROR_CONST: + return [] + return issues_str.split(';') + + @staticmethod + def __add_issues(issues_dict: Dict[str, int], issues: List[str]) -> None: + for issue in issues: + issues_dict[issue] += 1 + + def __init_solution_df_row(self, row: pd.DataFrame) -> None: + tutor_issues = self.__parse_issues(row[ComparisonColumnName.TUTOR_ISSUES.value]) + hyperstyle_issues = self.__parse_issues(row[ComparisonColumnName.HYPERSTYLE_ISSUES.value]) + common_issues = list((Counter(tutor_issues) & Counter(hyperstyle_issues)).elements()) + self.__add_issues(self.common_issue_to_freq, common_issues) + self.__add_issues(self.tutor_uniq_issue_to_freq, list(set(tutor_issues) - set(common_issues))) + self.__add_issues(self.hyperstyle_uniq_issue_to_freq, list(set(hyperstyle_issues) - set(common_issues))) + + def __print_freq_issues_stat(self, freq_stat: Dict[str, int], prefix: str) -> None: + print(f'{prefix} issues statistics:') + for issue, freq in freq_stat.items(): + print(f'{issue} was found {freq} times') + print(self.__separator) + + def print_issues_stat(self) -> None: + uniq_issues = (len(self.common_issue_to_freq) + + len(self.tutor_uniq_issue_to_freq) + + len(self.hyperstyle_uniq_issue_to_freq) + ) + print(f'{uniq_issues} unique issues in total was found') + print(self.__separator) + self.__print_freq_issues_stat(self.common_issue_to_freq, 'Common') + self.__print_freq_issues_stat(self.tutor_uniq_issue_to_freq, 'Tutor unique') + self.__print_freq_issues_stat(self.hyperstyle_uniq_issue_to_freq, 'Hyperstyle unique') + print(f'{self.code_style_issues_count} code style issues (spaces, different brackets, indentations)' + f' was found in total by hyperstyle in {self.fragments_count_with_code_style_issues} fragments') + print(self.__separator) diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py new file mode 100644 index 00000000..eff1bc15 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py @@ -0,0 +1,27 @@ +from enum import Enum, unique + + +@unique +class ComparisonColumnName(Enum): + STUDENT_ID = 'student_id' + TASK_KEY = 'task_key' + SOLUTION = 'solution' + TUTOR_ERROR = 'tutor_error' + + TUTOR_ISSUES = 'tutor_issues' + HYPERSTYLE_ISSUES = 'hyperstyle_issues' + HYPERSTYLE_INFO_ISSUES = 'hyperstyle_info_issues' + CODE_STYLE_ISSUES_COUNT = 'code_style_issues_count' + + +ERROR_CONST = 'ERROR' + + +@unique +class TutorTask(Enum): + EVEN = 'countEven' + SUM_VALUES = 'sumValues' + ODD_SUM = 'oddSum' + SCORE = 'calculateScore' + HAS_DOUBLED = 'hasDoubled' + HAVE_THREE = 'haveThree' diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/README.md b/src/python/evaluation/paper_evaluation/user_dynamics/README.md new file mode 100644 index 00000000..9e46df11 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/user_dynamics/README.md @@ -0,0 +1,29 @@ +# Dynamics of student usage + +This module allows getting statistics about students dynamics in code quality issues improvements. + +## Usage + +Run the [dynamics_gathering.py](dynamics_gathering.py) with the arguments from command line. + +Required arguments: + +`solutions_file_path` — path to csv-file with code samples. + +Optional arguments: +Argument | Description +--- | --- +|**‑fb**, **‑‑freq-boundary**| The boundary of solutions count for one student to analyze. The default value is 100.| +|**‑n**, **‑‑n**| Top n popular issues in solutions. The default value is 100. | + +In the result a file with students issues dynamics will be created. +Also, the top of issues for all students will be printed into the terminal. This statistics has key of issue and frequency for all students. + +An example of issues dynamics: +```text +user,traceback +0,"0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,0,0,2,0,4,0,6,3,0,3,0,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,0,0,0,4,0,0,0,1,6,0,1,0,1,3,0,0,1,1,0,0,0,0,0,3,6,1,0,0,0,0,0,0,0,4,1,0,0,1,0,8,0,2,8,0,0,0,0,1,1,1,1,3,7,23,0,9" +1,"0,0,0,3,0,0,2,1,0,0,0,0,4,1,0,0,1,1,0,0,0,0,0,6,0,1,1,0,8,1,2,1,1,0,0,1,0,4,10,1,1,1,3,0,1,0,0,0,1,0,0,0,0,0,0,2,0,3,0,0,2,2,3,2,0,0,0,1,0,1,1,0,0,1,0,4,6,2,0,0,1,0,0,0,0,2,0,0,0,2,1,2,1,0,1,7,1,0,1,1,0,1,0" +``` +Each number in the traceback column is the count of issues in one solution. +The numbers of issues sorted by timestamps. \ No newline at end of file diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/__init__.py b/src/python/evaluation/paper_evaluation/user_dynamics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py new file mode 100644 index 00000000..7d9bbec5 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py @@ -0,0 +1,111 @@ +import argparse +import sys +from collections import Counter +from pathlib import Path +from typing import Dict, List + +import pandas as pd +from src.python.common.tool_arguments import RunToolArgument +from src.python.evaluation.common.csv_util import write_dataframe_to_csv +from src.python.evaluation.common.pandas_util import ( + drop_duplicates, filter_df_by_single_value, get_issues_from_json, get_solutions_df, logger, +) +from src.python.evaluation.common.util import ColumnName +from src.python.evaluation.inspectors.common.statistics import PenaltyIssue +from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import sort_freq_dict +from src.python.evaluation.paper_evaluation.user_dynamics.user_statistics import UserStatistics +from src.python.review.common.file_system import Extension, get_parent_folder, get_restricted_extension +from src.python.review.inspectors.issue import IssueType + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name, + type=lambda value: Path(value).absolute(), + help=RunToolArgument.SOLUTIONS_FILE_PATH.value) + + parser.add_argument('-fb', '--freq-boundary', + help='The boundary of solutions count for one student to analyze', + type=int, + default=100) + + parser.add_argument('-n', '--n', + help='Top n popular issues in solutions', + type=int, + default=10) + + +def __get_top_freq_issues(issues: List[List[PenaltyIssue]], n: int) -> Dict[str, int]: + all_issues = list(map(lambda i: i.origin_class, [item for sublist in issues for item in sublist])) + return dict(Counter(all_issues).most_common(n)) + + +# Get statistics only for users that have >= freq_boundary solutions in solutions_df +# Statistics for each student has: +# - - list of list of issues, but without INFO issues +# - - for each key of issue from has frequency. +# Contains only top_n issues +def __get_user_statistics(solutions_df: pd.DataFrame, freq_boundary: int = 100, + top_n: int = 10) -> List[UserStatistics]: + stat = [] + counts = solutions_df[ColumnName.USER.value].value_counts() + solutions_df = solutions_df[solutions_df[ColumnName.USER.value].isin(counts[counts > freq_boundary].index)] + for user in solutions_df[ColumnName.USER.value].unique(): + user_df = filter_df_by_single_value(solutions_df, + ColumnName.USER.value, user).sort_values(ColumnName.TIME.value) + user_df = drop_duplicates(user_df) + traceback = list(map(lambda t: get_issues_from_json(t), + list(user_df[ColumnName.TRACEBACK.value]))) + # Filter info category + traceback = list(filter(lambda issues_list: filter(lambda i: i.type != IssueType.INFO, issues_list), traceback)) + top_issues = __get_top_freq_issues(traceback, top_n) + stat.append(UserStatistics(traceback, top_issues)) + return stat + + +def __get_student_dynamics(stats: List[UserStatistics]) -> pd.DataFrame: + dynamics = map(lambda s: s.get_traceback_dynamics(), stats) + dynamics_dict = {i: ','.join(map(lambda d: str(d), dyn)) for (i, dyn) in enumerate(dynamics)} + return pd.DataFrame(dynamics_dict.items(), columns=[ColumnName.USER.value, ColumnName.TRACEBACK.value]) + + +def __get_total_top(stats: List[UserStatistics]) -> Dict[str, int]: + total_top_n = {} + for d in map(lambda s: s.top_issues, stats): + for k, v in d.items(): + total_top_n.setdefault(k, 0) + total_top_n[k] += v + return sort_freq_dict(total_top_n) + + +def main() -> int: + parser = argparse.ArgumentParser() + configure_arguments(parser) + + try: + args = parser.parse_args() + solutions_file_path = args.solutions_file_path + extension = get_restricted_extension(solutions_file_path, [Extension.CSV]) + solutions_df = get_solutions_df(extension, solutions_file_path) + solutions_df = filter_df_by_single_value(solutions_df, ColumnName.IS_PUBLIC.value, 'YES') + stats = __get_user_statistics(solutions_df, freq_boundary=args.freq_boundary, top_n=args.n) + dynamics = __get_student_dynamics(stats) + output_path = get_parent_folder(Path(solutions_file_path)) / f'student_issues_dynamics{Extension.CSV.value}' + write_dataframe_to_csv(output_path, dynamics) + print(f'The students dynamics was saved here: {output_path}') + total_top = __get_total_top(stats) + print('Total top issues:') + for i, (key, freq) in enumerate(total_top.items()): + print(f'{i}. {key} was found {freq} times') + return 0 + + except FileNotFoundError: + logger.error('CSV-file with the specified name does not exists.') + return 2 + + except Exception: + logger.exception('An unexpected error.') + return 2 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py new file mode 100644 index 00000000..0423a70b --- /dev/null +++ b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass +from typing import Dict, List + +from src.python.evaluation.inspectors.common.statistics import PenaltyIssue + + +@dataclass +class UserStatistics: + traceback: List[List[PenaltyIssue]] + top_issues: Dict[str, int] + + def get_traceback_dynamics(self) -> List[int]: + return list(map(lambda i_l: len(i_l), self.traceback)) diff --git a/test/python/evaluation/statistics/__init__.py b/test/python/evaluation/issues_statistics/__init__.py similarity index 57% rename from test/python/evaluation/statistics/__init__.py rename to test/python/evaluation/issues_statistics/__init__.py index 08bac33a..9a178e36 100644 --- a/test/python/evaluation/statistics/__init__.py +++ b/test/python/evaluation/issues_statistics/__init__.py @@ -1,8 +1,8 @@ from test.python.evaluation import CURRENT_TEST_DATA_FOLDER -STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'statistics' +ISSUES_STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'issues_statistics' -GET_RAW_ISSUES_DATA_FOLDER = STATISTICS_TEST_DATA_FOLDER / 'get_raw_issues' +GET_RAW_ISSUES_DATA_FOLDER = ISSUES_STATISTICS_TEST_DATA_FOLDER / 'get_raw_issues' GET_RAW_ISSUES_TEST_FILES_FOLDER = GET_RAW_ISSUES_DATA_FOLDER / 'test_files' diff --git a/test/python/evaluation/statistics/test_get_raw_issues.py b/test/python/evaluation/issues_statistics/test_get_raw_issues.py similarity index 97% rename from test/python/evaluation/statistics/test_get_raw_issues.py rename to test/python/evaluation/issues_statistics/test_get_raw_issues.py index c11882c2..cafa3b63 100644 --- a/test/python/evaluation/statistics/test_get_raw_issues.py +++ b/test/python/evaluation/issues_statistics/test_get_raw_issues.py @@ -1,12 +1,14 @@ from pathlib import Path from test.python.common_util import equal_df -from test.python.evaluation.statistics import GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER +from test.python.evaluation.issues_statistics import ( + GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER, +) from typing import List, Optional import pandas as pd import pytest from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path -from src.python.evaluation.statistics.get_raw_issues import _filter_issues, _get_output_path, inspect_solutions +from src.python.evaluation.issues_statistics.get_raw_issues import _filter_issues, _get_output_path, inspect_solutions from src.python.review.inspectors.inspector_type import InspectorType from src.python.review.inspectors.issue import BaseIssue, CodeIssue, IssueType, LineLenIssue, MaintainabilityLackIssue diff --git a/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py b/test/python/evaluation/issues_statistics/test_raw_issue_encoding_decoding.py similarity index 98% rename from test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py rename to test/python/evaluation/issues_statistics/test_raw_issue_encoding_decoding.py index 82c6c901..43c20e08 100644 --- a/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py +++ b/test/python/evaluation/issues_statistics/test_raw_issue_encoding_decoding.py @@ -3,7 +3,7 @@ from pathlib import Path import pytest -from src.python.evaluation.statistics.common.raw_issue_encoder_decoder import RawIssueDecoder, RawIssueEncoder +from src.python.evaluation.issues_statistics.common.raw_issue_encoder_decoder import RawIssueDecoder, RawIssueEncoder from src.python.review.inspectors.inspector_type import InspectorType from src.python.review.inspectors.issue import ( BaseIssue, diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_fragment_per_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_fragment_per_language.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_fragment_per_language.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_fragment_per_language.csv diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_code.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_code.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_code.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_code.csv diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_language.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_language.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_language.csv diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_fragment_per_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_fragment_per_language.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_fragment_per_language.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_fragment_per_language.csv diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_code.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_code.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_code.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_code.csv diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_language.csv similarity index 100% rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_language.csv rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_language.csv diff --git a/whitelist.txt b/whitelist.txt index b505f492..18fd4cb7 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -183,3 +183,9 @@ Xpath Ctor Atclause puppycrawl +CONST +isna +dropna +sublist +dyn +setdefault