diff --git a/README.md b/README.md index 67ce8430..60208054 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ Argument | Description **‑s**, **‑‑start-line**| the first line to be analyzed. By default it starts from `1`. **‑e**, **‑‑end-line** | the end line to be analyzed. The default value is `None`, which meant to handle file by the end. **‑‑new-format** | the argument determines whether the tool should use the _new format_. _New format_ means separating the result by the files to allow getting quality and observed issues for each file separately. The default value is `False`. +**‑‑history** | JSON string with a list of issues for each language. For each issue its class and quantity are specified. Example: `--history "{\"python\": [{\"origin_class\": \"SC200\", \"number\": 20}, {\"origin_class\": \"WPS314\", \"number\": 3}]}"` The output examples: @@ -124,7 +125,8 @@ The output examples: "line": "", "line_number": 54, "column_number": 0, - "category": "FUNC_LEN" + "category": "FUNC_LEN", + "influence_on_penalty": 0 }, ... ] @@ -153,7 +155,8 @@ The output examples: "line": "", "line_number": 174, "column_number": 12, - "category": "BEST_PRACTICES" + "category": "BEST_PRACTICES", + "influence_on_penalty": 0 }, ... ] diff --git a/src/python/common/tool_arguments.py b/src/python/common/tool_arguments.py index 9fe4181b..5038653b 100644 --- a/src/python/common/tool_arguments.py +++ b/src/python/common/tool_arguments.py @@ -72,3 +72,7 @@ class RunToolArgument(Enum): NEW_FORMAT = ArgumentsInfo(None, '--new-format', 'The argument determines whether the tool ' 'should use the new format') + + HISTORY = ArgumentsInfo(None, '--history', + 'Json string, which contains lists of issues in the previous submissions ' + 'for other tasks for one user.') diff --git a/src/python/evaluation/xlsx_run_tool.py b/src/python/evaluation/xlsx_run_tool.py index 7235b041..b2200f19 100644 --- a/src/python/evaluation/xlsx_run_tool.py +++ b/src/python/evaluation/xlsx_run_tool.py @@ -20,7 +20,7 @@ ) from src.python.evaluation.evaluation_config import EvaluationConfig from src.python.review.application_config import LanguageVersion -from src.python.review.common.file_system import create_file, new_temp_dir +from src.python.review.common.file_system import create_file from src.python.review.common.subprocess_runner import run_in_subprocess from src.python.review.reviewers.perform_review import OutputFormat @@ -72,6 +72,15 @@ def configure_arguments(parser: argparse.ArgumentParser, run_tool_arguments: Typ 'is enabled argument will not be used otherwise.') +def get_language(lang_key: str) -> LanguageVersion: + try: + return LanguageVersion(lang_key) + except ValueError as e: + logger.error(script_structure_rule) + # We should raise KeyError since it is incorrect value for key in a column + raise KeyError(e) + + def create_dataframe(config: EvaluationConfig) -> pd.DataFrame: report = pd.DataFrame( { @@ -95,35 +104,29 @@ def create_dataframe(config: EvaluationConfig) -> pd.DataFrame: for lang, code in zip(lang_code_dataframe[ColumnName.LANG.value], lang_code_dataframe[ColumnName.CODE.value]): - with new_temp_dir() as create_temp_dir: - temp_dir_path = create_temp_dir - lang_extension = LanguageVersion.language_by_extension(lang) - temp_file_path = os.path.join(temp_dir_path, ('file' + lang_extension)) - temp_file_path = next(create_file(temp_file_path, code)) - - try: - assert os.path.exists(temp_file_path) - except AssertionError as e: - logger.exception('Path does not exist.') - raise e - - command = config.build_command(temp_file_path, lang) - results = run_in_subprocess(command) - os.remove(temp_file_path) - temp_dir_path.rmdir() - # this regular expression matches final tool grade: EXCELLENT, GOOD, MODERATE or BAD - grades = re.match(r'^.*{"code":\s"([A-Z]+)"', results).group(1) - output_row_values = [lang, code, grades] - column_indices = [ColumnName.LANGUAGE.value, - ColumnName.CODE.value, - ColumnName.GRADE.value] - - if config.traceback: - output_row_values.append(results) - column_indices.append(EvaluationArgument.TRACEBACK.value) - - new_file_report_row = pd.Series(data=output_row_values, index=column_indices) - report = report.append(new_file_report_row, ignore_index=True) + # Tool does not work correctly with tmp files from module on macOS + # thus we create a real file in the file system + extension = get_language(lang).extension_by_language().value + tmp_file_path = config.xlsx_file_path.parent.absolute() / f'inspected_code{extension}' + temp_file = next(create_file(tmp_file_path, code)) + + command = config.build_command(temp_file, lang) + results = run_in_subprocess(command) + os.remove(temp_file) + + # this regular expression matches final tool grade: EXCELLENT, GOOD, MODERATE or BAD + grades = re.match(r'^.*{"code":\s"([A-Z]+)"', results).group(1) + output_row_values = [lang, code, grades] + column_indices = [ColumnName.LANGUAGE.value, + ColumnName.CODE.value, + ColumnName.GRADE.value] + + if config.traceback: + output_row_values.append(results) + column_indices.append(EvaluationArgument.TRACEBACK.value) + + new_file_report_row = pd.Series(data=output_row_values, index=column_indices) + report = report.append(new_file_report_row, ignore_index=True) return report diff --git a/src/python/review/application_config.py b/src/python/review/application_config.py index 8d9a56f5..f41d439d 100644 --- a/src/python/review/application_config.py +++ b/src/python/review/application_config.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum, unique -from typing import List, Optional, Set +from typing import Dict, List, Optional, Set from src.python.review.common.file_system import Extension from src.python.review.inspectors.inspector_type import InspectorType @@ -15,6 +15,7 @@ class ApplicationConfig: start_line: int = 1 end_line: Optional[int] = None new_format: bool = False + history: Optional[str] = None @unique @@ -31,14 +32,13 @@ def values(cls) -> List[str]: return [member.value for member in cls.__members__.values()] @classmethod - def language_to_extension_dict(cls) -> dict: - return {cls.PYTHON_3.value: Extension.PY.value, - cls.JAVA_7.value: Extension.JAVA.value, - cls.JAVA_8.value: Extension.JAVA.value, - cls.JAVA_9.value: Extension.JAVA.value, - cls.JAVA_11.value: Extension.JAVA.value, - cls.KOTLIN.value: Extension.KT.value} - - @classmethod - def language_by_extension(cls, lang: str) -> str: - return cls.language_to_extension_dict()[lang] + def language_to_extension_dict(cls) -> Dict['LanguageVersion', Extension]: + return {cls.PYTHON_3: Extension.PY, + cls.JAVA_7: Extension.JAVA, + cls.JAVA_8: Extension.JAVA, + cls.JAVA_9: Extension.JAVA, + cls.JAVA_11: Extension.JAVA, + cls.KOTLIN: Extension.KT} + + def extension_by_language(self) -> Extension: + return self.language_to_extension_dict()[self] diff --git a/src/python/review/common/file_system.py b/src/python/review/common/file_system.py index 3ab06061..d0b3dca0 100644 --- a/src/python/review/common/file_system.py +++ b/src/python/review/common/file_system.py @@ -4,7 +4,7 @@ from contextlib import contextmanager from enum import Enum, unique from pathlib import Path -from typing import Callable, List, Union +from typing import Callable, List, Tuple, Union @unique @@ -61,6 +61,10 @@ def new_temp_dir() -> Path: yield Path(temp_dir) +def new_temp_file(suffix: Extension = Extension.EMPTY) -> Tuple[str, str]: + yield tempfile.mkstemp(suffix=suffix.value) + + # File should contain the full path and its extension. # Create all parents if necessary def create_file(file_path: Union[str, Path], content: str): diff --git a/src/python/review/quality/model.py b/src/python/review/quality/model.py index 7a57dcf6..78291370 100644 --- a/src/python/review/quality/model.py +++ b/src/python/review/quality/model.py @@ -15,15 +15,18 @@ class QualityType(Enum): GOOD = 'GOOD' EXCELLENT = 'EXCELLENT' - def __le__(self, other: 'QualityType') -> bool: - order = { + def to_number(self) -> int: + type_to_number = { QualityType.BAD: 0, QualityType.MODERATE: 1, QualityType.GOOD: 2, QualityType.EXCELLENT: 3, } - return order[self] < order[other] + return type_to_number.get(self, 3) + + def __le__(self, other: 'QualityType') -> bool: + return self.to_number() < other.to_number() class Rule(abc.ABC): diff --git a/src/python/review/quality/penalty.py b/src/python/review/quality/penalty.py new file mode 100644 index 00000000..30872465 --- /dev/null +++ b/src/python/review/quality/penalty.py @@ -0,0 +1,201 @@ +import json +from dataclasses import dataclass +from typing import Dict, List, Optional, Set + +from src.python.review.common.language import Language +from src.python.review.inspectors.issue import BaseIssue, IssueType +from src.python.review.quality.model import QualityType + +# TODO: need testing +ISSUE_TYPE_TO_PENALTY_COEFFICIENT = { + IssueType.COHESION: 1, + IssueType.COUPLING: 1, + IssueType.FUNC_LEN: 1, + IssueType.LINE_LEN: 1, + IssueType.ARCHITECTURE: 1, + IssueType.BEST_PRACTICES: 1, + IssueType.BOOL_EXPR_LEN: 1, + IssueType.CHILDREN_NUMBER: 1, + IssueType.CLASS_RESPONSE: 1, + IssueType.CODE_STYLE: 1, + IssueType.COMPLEXITY: 1, + IssueType.CYCLOMATIC_COMPLEXITY: 1, + IssueType.ERROR_PRONE: 1, + IssueType.INHERITANCE_DEPTH: 1, + IssueType.MAINTAINABILITY: 1, + IssueType.METHOD_NUMBER: 1, + IssueType.WEIGHTED_METHOD: 1, +} + + +@dataclass +class PenaltyConfig: + one_level_quality_reduction: float + two_level_quality_reduction: float + three_level_quality_reduction: float + + +# TODO: need testing +common_penalty_rule = PenaltyConfig( + one_level_quality_reduction=0.5, + two_level_quality_reduction=0.7, + three_level_quality_reduction=0.9, +) + + +@dataclass +class PreviousIssue: + origin_class: str + number: int + category: IssueType = None + + +def get_previous_issues_by_language(lang_to_history: Optional[str], language: Language) -> List[PreviousIssue]: + """ + Reads a json string and returns a list of previously made issues for the specified language. + """ + if lang_to_history is None: + return [] + + language_to_history = json.loads(lang_to_history) + history = language_to_history[language.value.lower()] + + previous_issues = [] + for issue_data in history: + previous_issues.append(PreviousIssue(**issue_data)) + return previous_issues + + +def categorize(previous_issues: List[PreviousIssue], current_issues: List[BaseIssue]): + """ + For each previously made issue determines its category, with the help of current issues. + """ + origin_class_to_category = {} + for issue in current_issues: + origin_class_to_category[issue.origin_class] = issue.type + + for issue in previous_issues: + issue.category = origin_class_to_category.get(issue.origin_class, None) + + +class Punisher: + """ + Punisher with the list of previous issues and current issues allows you to use the 'get_quality_with_penalty' + function to get quality including the penalty for previous issues and use the 'get_issue_influence_on_penalty' + function to get the influence of an issues on reducing the quality score. + """ + + _penalty_coefficient: float + _normalized_penalty_coefficient: float + _issue_class_to_influence: Dict[str, float] + + def __init__(self, current_issues: List[BaseIssue], previous_issues: List[PreviousIssue]): + self._penalty_coefficient = self._get_penalty_coefficient(current_issues, previous_issues) + self._normalized_penalty_coefficient = self._get_normalized_penalty_coefficient(current_issues) + self._issue_class_to_influence = self._get_issue_class_to_influence(current_issues, previous_issues) + + def get_quality_with_penalty(self, quality_without_penalty: QualityType) -> QualityType: + """ + Depending on the penalty coefficient, reduces the quality type. + """ + numbered_quality_type = quality_without_penalty.to_number() + numbered_quality_type -= self._get_penalty_score() + + quality = QualityType.EXCELLENT + + if numbered_quality_type <= 0: + quality = QualityType.BAD + elif numbered_quality_type == 1: + quality = QualityType.MODERATE + elif numbered_quality_type == 2: + quality = QualityType.GOOD + + return quality + + def get_issue_influence_on_penalty(self, issue_class: str) -> int: + """ + Calculates the influence of the issue on the penalty. + + Returns a number in the range from 0 to 100. + """ + + return int(self._issue_class_to_influence.get(issue_class, 0) * 100) + + def _get_penalty_score(self) -> int: + """ + Calculates the penalty score with the penalty coefficient + + Returns a number equal to 0, 1, 2 or 3, which describes how many levels the grade should be lowered. + """ + + penalty_score = 3 + + if self._normalized_penalty_coefficient < common_penalty_rule.one_level_quality_reduction: + penalty_score = 0 + elif self._normalized_penalty_coefficient < common_penalty_rule.two_level_quality_reduction: + penalty_score = 1 + elif self._normalized_penalty_coefficient < common_penalty_rule.three_level_quality_reduction: + penalty_score = 2 + + return penalty_score + + def _get_penalty_coefficient(self, current_issues: List[BaseIssue], previous_issues: List[PreviousIssue]) -> float: + """ + To calculate the penalty coefficient we use those issues that occurred earlier and repeated again. + Such issues will be called penalizing issues. For each penalizing issue, we calculate a number equal to + the number of times this issue was repeated earlier multiplied by the coefficient of the category to + which the issue belongs. These numbers are added up to get the penalty coefficient. + """ + + penalizing_classes = self._get_penalizing_classes(current_issues, previous_issues) + penalizing_issues = list(filter(lambda issue: issue.origin_class in penalizing_classes, previous_issues)) + + coefficient = 0 + for issue in penalizing_issues: + coefficient += ISSUE_TYPE_TO_PENALTY_COEFFICIENT.get(issue.category, 1) * issue.number + + return coefficient + + def _get_normalized_penalty_coefficient(self, current_issues: List[BaseIssue]) -> float: + """ + The penalty coefficient is normalized by the formula: k / (k + n), + where k is the penalty coefficient, n is the number of current issues. + """ + + coefficient = 0 + if current_issues: + coefficient = self._penalty_coefficient / (self._penalty_coefficient + len(current_issues)) + + return coefficient + + def _get_issue_class_to_influence(self, + current_issues: List[BaseIssue], + previous_issues: List[PreviousIssue]) -> Dict[str, float]: + """ + For each issue, the corresponding influence on penalty is calculated. + To do this, for each issue we count its penalty coefficient, normalize it, + and divide the resulting number by the total normalized penalty coefficient. + """ + + result = {} + for issue in previous_issues: + influence = 0 + if current_issues: + issue_coefficient = ISSUE_TYPE_TO_PENALTY_COEFFICIENT.get(issue.category, 1) * issue.number + normalized_issue_coefficient = issue_coefficient / (self._penalty_coefficient + len(current_issues)) + influence = normalized_issue_coefficient / self._normalized_penalty_coefficient + + result[issue.origin_class] = influence + + return result + + @staticmethod + def _get_penalizing_classes(current_issues: List[BaseIssue], previous_issues: List[PreviousIssue]) -> Set[str]: + """ + Returns issues that should be penalized. + We penalize for those issues that were there before, but repeated again. + """ + current_classes = set(map(lambda issue: issue.origin_class, current_issues)) + previous_classes = set(map(lambda issue: issue.origin_class, previous_issues)) + + return previous_classes.intersection(current_classes) diff --git a/src/python/review/reviewers/common.py b/src/python/review/reviewers/common.py index 9340d824..1c7628fb 100644 --- a/src/python/review/reviewers/common.py +++ b/src/python/review/reviewers/common.py @@ -15,6 +15,7 @@ from src.python.review.inspectors.radon.radon import RadonInspector from src.python.review.quality.evaluate_quality import evaluate_quality from src.python.review.quality.model import Quality +from src.python.review.quality.penalty import categorize, get_previous_issues_by_language, Punisher from src.python.review.reviewers.review_result import FileReviewResult, ReviewResult from src.python.review.reviewers.utils.code_statistics import gather_code_statistics from src.python.review.reviewers.utils.issues_filter import filter_duplicate_issues, filter_low_measure_issues @@ -64,14 +65,20 @@ def perform_language_review(metadata: Metadata, for issue in issues: file_path_to_issues[issue.file_path].append(issue) - file_review_results = [] + previous_issues = get_previous_issues_by_language(config.history, language) + categorize(previous_issues, issues) + + general_punisher = Punisher(issues, previous_issues) general_quality = Quality([]) + + file_review_results = [] for file_metadata in files_metadata: issues = file_path_to_issues[file_metadata.path] code_statistics = gather_code_statistics(issues, file_metadata.path) code_statistics.total_lines = min(code_statistics.total_lines, get_range_lines(config.start_line, config.end_line)) + punisher = Punisher(issues, previous_issues) quality = evaluate_quality(code_statistics, language) general_quality = general_quality.merge(quality) @@ -79,11 +86,13 @@ def perform_language_review(metadata: Metadata, file_metadata.path, issues, quality, + punisher, )) return ReviewResult( file_review_results, general_quality, + general_punisher, ) diff --git a/src/python/review/reviewers/review_result.py b/src/python/review/reviewers/review_result.py index 0fde257a..8ca9ff23 100644 --- a/src/python/review/reviewers/review_result.py +++ b/src/python/review/reviewers/review_result.py @@ -4,6 +4,7 @@ from src.python.review.inspectors.issue import BaseIssue from src.python.review.quality.model import Quality +from src.python.review.quality.penalty import Punisher @dataclass @@ -11,12 +12,14 @@ class FileReviewResult: file_path: Path issues: List[BaseIssue] quality: Quality + punisher: Punisher @dataclass class ReviewResult: file_review_results: List[FileReviewResult] general_quality: Quality + general_punisher: Punisher @property def all_issues(self) -> List[BaseIssue]: diff --git a/src/python/review/reviewers/utils/print_review.py b/src/python/review/reviewers/utils/print_review.py index 9da0e35b..a5a2f59b 100644 --- a/src/python/review/reviewers/utils/print_review.py +++ b/src/python/review/reviewers/utils/print_review.py @@ -1,8 +1,10 @@ import json import linecache from pathlib import Path +from typing import Any, Dict from src.python.review.common.file_system import get_file_line +from src.python.review.inspectors.issue import BaseIssue from src.python.review.reviewers.review_result import ReviewResult @@ -48,23 +50,19 @@ def print_review_result_as_json(review_result: ReviewResult) -> None: issues.sort(key=lambda issue: issue.line_no) - quality_value = review_result.general_quality.quality_type.value + quality_without_penalty = review_result.general_quality.quality_type + quality_with_penalty = review_result.general_punisher.get_quality_with_penalty(quality_without_penalty) output_json = {'quality': { - 'code': quality_value, - 'text': f'Code quality (beta): {quality_value}', + 'code': quality_with_penalty.value, + 'text': f'Code quality (beta): {quality_with_penalty.value}', }, 'issues': []} for issue in issues: - line_text = get_file_line(issue.file_path, issue.line_no) + influence_on_penalty = 0 + if quality_with_penalty != quality_without_penalty: + influence_on_penalty = review_result.general_punisher.get_issue_influence_on_penalty(issue.origin_class) - output_json['issues'].append({ - 'code': issue.origin_class, - 'text': issue.description, - 'line': line_text, - 'line_number': issue.line_no, - 'column_number': issue.column_no, - 'category': issue.type.value, - }) + output_json['issues'].append(convert_issue_to_json(issue, influence_on_penalty)) print(json.dumps(output_json)) @@ -75,12 +73,13 @@ def print_review_result_as_multi_file_json(review_result: ReviewResult) -> None: review_result.file_review_results.sort(key=lambda result: result.file_path) for file_review_result in review_result.file_review_results: - quality_value = file_review_result.quality.quality_type.value + quality_without_penalty = file_review_result.quality.quality_type + quality_with_penalty = file_review_result.punisher.get_quality_with_penalty(quality_without_penalty) file_review_result_json = { 'file_name': str(file_review_result.file_path), 'quality': { - 'code': quality_value, - 'text': f'Code quality (beta): {quality_value}', + 'code': quality_with_penalty.value, + 'text': f'Code quality (beta): {quality_with_penalty.value}', }, 'issues': [], } @@ -88,25 +87,35 @@ def print_review_result_as_multi_file_json(review_result: ReviewResult) -> None: file_review_result_jsons.append(file_review_result_json) for issue in file_review_result.issues: - line_text = get_file_line(issue.file_path, issue.line_no) + influence_on_penalty = 0 + if quality_with_penalty != quality_without_penalty: + influence_on_penalty = file_review_result.punisher.get_issue_influence_on_penalty(issue.origin_class) - file_review_result_json['issues'].append({ - 'code': issue.origin_class, - 'text': issue.description, - 'line': line_text, - 'line_number': issue.line_no, - 'column_number': issue.column_no, - 'category': issue.type.value, - }) + file_review_result_json['issues'].append(convert_issue_to_json(issue, influence_on_penalty)) - quality_value = review_result.general_quality.quality_type.value + quality_without_penalty = review_result.general_quality.quality_type + quality_with_penalty = review_result.general_punisher.get_quality_with_penalty(quality_without_penalty) output_json = { 'quality': { - 'code': quality_value, - 'text': f'Code quality (beta): {quality_value}', + 'code': quality_with_penalty.value, + 'text': f'Code quality (beta): {quality_with_penalty.value}', }, 'file_review_results': file_review_result_jsons, } print(json.dumps(output_json)) + + +def convert_issue_to_json(issue: BaseIssue, influence_on_penalty: int) -> Dict[str, Any]: + line_text = get_file_line(issue.file_path, issue.line_no) + + return { + 'code': issue.origin_class, + 'text': issue.description, + 'line': line_text, + 'line_number': issue.line_no, + 'column_number': issue.column_no, + 'category': issue.type.value, + 'influence_on_penalty': influence_on_penalty, + } diff --git a/src/python/review/run_tool.py b/src/python/review/run_tool.py index bdfbb41f..fc74774d 100644 --- a/src/python/review/run_tool.py +++ b/src/python/review/run_tool.py @@ -4,6 +4,7 @@ import os import sys import traceback +from json import JSONDecodeError from pathlib import Path from typing import Set @@ -103,6 +104,10 @@ def configure_arguments(parser: argparse.ArgumentParser, tool_arguments: enum.En action='store_true', help=tool_arguments.NEW_FORMAT.value.description) + parser.add_argument(tool_arguments.HISTORY.value.long_name, + help=tool_arguments.HISTORY.value.description, + type=str) + def configure_logging(verbosity: VerbosityLevel) -> None: if verbosity is VerbosityLevel.ERROR: @@ -146,6 +151,7 @@ def main() -> int: start_line=start_line, end_line=args.end_line, new_format=args.new_format, + history=args.history, ) n_issues = perform_and_print_review(args.path, OutputFormat(args.format), config) @@ -153,12 +159,19 @@ def main() -> int: return 0 return 1 + except PathNotExists: logger.error('Path not exists') return 2 + except UnsupportedLanguage: logger.error('Unsupported language. Supported ones are Java, Kotlin, Python') return 2 + + except JSONDecodeError: + logger.error('Incorrect JSON') + return 2 + except Exception: traceback.print_exc() logger.exception('An unexpected error') diff --git a/test/python/common/__init__.py b/test/python/common/__init__.py new file mode 100644 index 00000000..689b1893 --- /dev/null +++ b/test/python/common/__init__.py @@ -0,0 +1,5 @@ +from test.python import TEST_DATA_FOLDER + +CURRENT_TEST_DATA_FOLDER = TEST_DATA_FOLDER / 'common' + +FILE_SYSTEM_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'file_system' diff --git a/test/python/common/file_system/__init__.py b/test/python/common/file_system/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/python/common/file_system/test_subprocess.py b/test/python/common/file_system/test_subprocess.py new file mode 100644 index 00000000..1f2104dd --- /dev/null +++ b/test/python/common/file_system/test_subprocess.py @@ -0,0 +1,38 @@ +import os +from pathlib import Path +from test.python.common import FILE_SYSTEM_DATA_FOLDER +from test.python.evaluation.testing_config import get_testing_arguments + +import pytest +from src.python.evaluation.evaluation_config import EvaluationConfig +from src.python.review.application_config import LanguageVersion +from src.python.review.common.file_system import create_file, get_content_from_file +from src.python.review.common.subprocess_runner import run_in_subprocess + +INPUT_DATA = [ + ('in_1.java', LanguageVersion.JAVA_11), + ('in_2.py', LanguageVersion.PYTHON_3), +] + + +def inspect_code(config: EvaluationConfig, file: str, language: LanguageVersion) -> str: + command = config.build_command(file, language.value) + return run_in_subprocess(command) + + +@pytest.mark.parametrize(('test_file', 'language'), INPUT_DATA) +def test_synthetic_files(test_file: str, language: LanguageVersion): + input_file = FILE_SYSTEM_DATA_FOLDER / test_file + test_args = get_testing_arguments(to_add_traceback=True, to_add_tool_path=True) + config = EvaluationConfig(test_args) + + expected_output = inspect_code(config, input_file, language) + + input_code = get_content_from_file(Path(input_file)) + actual_file = next(create_file(FILE_SYSTEM_DATA_FOLDER / f'actual_file{language.extension_by_language().value}', + input_code)) + + actual_output = inspect_code(config, actual_file, language) + os.remove(actual_file) + + assert actual_output == expected_output diff --git a/test/python/evaluation/testing_config.py b/test/python/evaluation/testing_config.py index 70341635..1e8fc5a9 100644 --- a/test/python/evaluation/testing_config.py +++ b/test/python/evaluation/testing_config.py @@ -15,4 +15,6 @@ def get_testing_arguments(to_add_traceback=None, to_add_tool_path=None) -> Names if to_add_tool_path: testing_arguments.tool_path = MAIN_FOLDER.parent / 'review/run_tool.py' + testing_arguments.xlsx_file_path = None + return testing_arguments diff --git a/test/python/functional_tests/test_multi_file_project.py b/test/python/functional_tests/test_multi_file_project.py index 86a029d1..3f4178d5 100644 --- a/test/python/functional_tests/test_multi_file_project.py +++ b/test/python/functional_tests/test_multi_file_project.py @@ -38,6 +38,7 @@ 'line_number': 2, 'column_number': 5, 'category': 'BEST_PRACTICES', + 'influence_on_penalty': 0, }, { 'code': 'W0612', @@ -46,6 +47,7 @@ 'line_number': 3, 'column_number': 5, 'category': 'BEST_PRACTICES', + 'influence_on_penalty': 0, }, { 'code': 'W0612', @@ -54,6 +56,7 @@ 'line_number': 4, 'column_number': 5, 'category': 'BEST_PRACTICES', + 'influence_on_penalty': 0, }, ], }, diff --git a/test/python/functional_tests/test_range_of_lines.py b/test/python/functional_tests/test_range_of_lines.py index 1ef4a8f5..3d3e78bc 100644 --- a/test/python/functional_tests/test_range_of_lines.py +++ b/test/python/functional_tests/test_range_of_lines.py @@ -17,19 +17,22 @@ 'column_number': 2, 'line': 'a=10', 'line_number': 1, - 'text': 'missing whitespace around operator'}, + 'text': 'missing whitespace around operator', + 'influence_on_penalty': 0}, {'category': 'CODE_STYLE', 'code': 'E225', 'column_number': 2, 'line': 'b=20', 'line_number': 2, - 'text': 'missing whitespace around operator'}, + 'text': 'missing whitespace around operator', + 'influence_on_penalty': 0}, {'category': 'CODE_STYLE', 'code': 'E225', 'column_number': 2, 'line': 'c=a + b', 'line_number': 4, 'text': 'missing whitespace around operator', + 'influence_on_penalty': 0, }, ], } @@ -85,6 +88,7 @@ def test_range_filter_when_start_line_is_not_first( 'line_number': 4, 'column_number': 2, 'category': 'CODE_STYLE', + 'influence_on_penalty': 0, }], } @@ -153,6 +157,7 @@ def test_range_filter_when_end_line_is_first( 'line_number': 1, 'column_number': 2, 'category': 'CODE_STYLE', + 'influence_on_penalty': 0, }], } @@ -219,6 +224,7 @@ def test_range_filter_when_both_start_and_end_lines_specified_not_equal_borders( 'line_number': 2, 'column_number': 2, 'category': 'CODE_STYLE', + 'influence_on_penalty': 0, }, { 'code': 'E225', 'text': 'missing whitespace around operator', @@ -226,6 +232,7 @@ def test_range_filter_when_both_start_and_end_lines_specified_not_equal_borders( 'line_number': 4, 'column_number': 2, 'category': 'CODE_STYLE', + 'influence_on_penalty': 0, }], } diff --git a/test/python/quality/__init__.py b/test/python/quality/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/python/quality/test_penalty.py b/test/python/quality/test_penalty.py new file mode 100644 index 00000000..51ca5729 --- /dev/null +++ b/test/python/quality/test_penalty.py @@ -0,0 +1,104 @@ +from pathlib import Path +from typing import List, Set + +import pytest +from src.python.review.inspectors.inspector_type import InspectorType +from src.python.review.inspectors.issue import BaseIssue, IssueType +from src.python.review.quality.penalty import categorize, PreviousIssue, Punisher + +punisher = Punisher([], []) + +CURRENT_ISSUES = [ + BaseIssue( + file_path=Path("."), + line_no=1, + column_no=1, + description="Possibly misspelt word", + origin_class="SC200", + inspector_type=InspectorType.FLAKE8, + type=IssueType.BEST_PRACTICES, + ), + BaseIssue( + file_path=Path("."), + line_no=10, + column_no=5, + description="Lambda may not be necessary", + origin_class="W0108", + inspector_type=InspectorType.FLAKE8, + type=IssueType.CODE_STYLE, + ), +] + +PREVIOUS_ISSUES_CURRENT_ISSUES_EXPECTED_CLASSES = [ + ([], [], set()), + ([], CURRENT_ISSUES, set()), + ([PreviousIssue("WPS301", 50), PreviousIssue("SC200", 10)], [], set()), + ([PreviousIssue("WPS301", 50), PreviousIssue("WPS412", 10)], CURRENT_ISSUES, set()), + ([PreviousIssue("SC200", 50), PreviousIssue("WPS412", 10)], CURRENT_ISSUES, {"SC200"}), + ([PreviousIssue("SC200", 50), PreviousIssue("W0108", 10)], CURRENT_ISSUES, {"SC200", "W0108"}), +] + + +@pytest.mark.parametrize(('previous_issues', 'current_issues', 'expected_penalizing_classes'), + PREVIOUS_ISSUES_CURRENT_ISSUES_EXPECTED_CLASSES) +def test_get_penalizing_classes(previous_issues: List[PreviousIssue], + current_issues: List[BaseIssue], + expected_penalizing_classes: Set[str]): + actual = punisher._get_penalizing_classes(current_issues, previous_issues) + + assert actual == expected_penalizing_classes + + +PREVIOUS_ISSUES_CURRENT_ISSUES_EXPECTED_CATEGORIES = [ + ([], [], []), + ([], CURRENT_ISSUES, []), + ([PreviousIssue("WPS412", 50), PreviousIssue("WPS412", 10)], [], [None, None]), + ([PreviousIssue("WPS412", 50), PreviousIssue("WPS412", 10)], CURRENT_ISSUES, [None, None]), + ([PreviousIssue("SC200", 50), PreviousIssue("WPS312", 10)], CURRENT_ISSUES, [IssueType.BEST_PRACTICES, None]), + ([PreviousIssue("SC200", 50), PreviousIssue("W0108", 10)], CURRENT_ISSUES, + [IssueType.BEST_PRACTICES, IssueType.CODE_STYLE]), +] + + +@pytest.mark.parametrize(('previous_issues', 'current_issues', 'expected_categories'), + PREVIOUS_ISSUES_CURRENT_ISSUES_EXPECTED_CATEGORIES) +def test_categorize(previous_issues: List[PreviousIssue], + current_issues: List[BaseIssue], + expected_categories: List[IssueType]): + categorize(previous_issues, current_issues) + + for issue, expected_category in zip(previous_issues, expected_categories): + assert issue.category == expected_category + + +ISSUE_CLASS_EXPECTED_INFLUENCE = [ + ("SC200", 63), + ("Q146", 0), +] + + +@pytest.mark.parametrize(('issue_class', 'expected_influence'), ISSUE_CLASS_EXPECTED_INFLUENCE) +def test_get_issue_influence_on_penalty(issue_class: str, expected_influence: int): + punisher._issue_class_to_influence = {"SC200": 0.636, "WPS312": 0.1225} + + actual = punisher.get_issue_influence_on_penalty(issue_class) + + assert actual == expected_influence + + +PENALTY_COEFFICIENT_CURRENT_ISSUES_NORMALIZED_PENALTY_COEFFICIENT = [ + (8, [], 0), + (8, CURRENT_ISSUES, 0.8), +] + + +@pytest.mark.parametrize(('penalty_coefficient', 'current_issues', 'normalized_penalty_coefficient'), + PENALTY_COEFFICIENT_CURRENT_ISSUES_NORMALIZED_PENALTY_COEFFICIENT) +def test_get_normalized_penalty_coefficient(penalty_coefficient: float, + current_issues: List[BaseIssue], + normalized_penalty_coefficient): + punisher._penalty_coefficient = penalty_coefficient + + actual = punisher._get_normalized_penalty_coefficient(current_issues) + + assert actual == normalized_penalty_coefficient diff --git a/test/resources/common/file_system/in_1.java b/test/resources/common/file_system/in_1.java new file mode 100644 index 00000000..1e4a378b --- /dev/null +++ b/test/resources/common/file_system/in_1.java @@ -0,0 +1,8 @@ +public class Main { + public static void main(String[] args) { + + int variable = 123456; + + System.out.println(variable); + } +} \ No newline at end of file diff --git a/test/resources/common/file_system/in_2.py b/test/resources/common/file_system/in_2.py new file mode 100644 index 00000000..aa1637a2 --- /dev/null +++ b/test/resources/common/file_system/in_2.py @@ -0,0 +1,13 @@ +a = int(input()) +b = int(input()) +c = int(input()) +d = int(input()) + +if a > b: + print('a > b') + +if a > b and a > b: + print('a > b again') + +if a > b and a < d: + print('b < a < d') \ No newline at end of file diff --git a/test/resources/evaluation/xlsx_target_files/target_sorted_order.xlsx b/test/resources/evaluation/xlsx_target_files/target_sorted_order.xlsx index 8cbc432f..8c24f18b 100644 Binary files a/test/resources/evaluation/xlsx_target_files/target_sorted_order.xlsx and b/test/resources/evaluation/xlsx_target_files/target_sorted_order.xlsx differ diff --git a/test/resources/evaluation/xlsx_target_files/target_unsorted_order.xlsx b/test/resources/evaluation/xlsx_target_files/target_unsorted_order.xlsx index 25ed6146..a091643a 100644 Binary files a/test/resources/evaluation/xlsx_target_files/target_unsorted_order.xlsx and b/test/resources/evaluation/xlsx_target_files/target_unsorted_order.xlsx differ diff --git a/whitelist.txt b/whitelist.txt index c60ec292..7095e567 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -83,6 +83,7 @@ writelines rmdir df unique +Punisher # Springlint issues cbo dit @@ -90,4 +91,6 @@ lcom noc nom wmc -util \ No newline at end of file +util +tmp +Namespace \ No newline at end of file