diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index a2441985..8d7e6199 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,5 @@ **Issues**: -[#ALT-](https://vyahhi.myjetbrains.com/youtrack/issue/ALT-) +[Issue#](https://github.com/hyperskill/hyperstyle/issues/#) **Description**: diff --git a/src/python/review/common/file_system.py b/src/python/review/common/file_system.py index 3e2ff145..28d0d167 100644 --- a/src/python/review/common/file_system.py +++ b/src/python/review/common/file_system.py @@ -2,30 +2,52 @@ import os import tempfile from contextlib import contextmanager +from enum import Enum from pathlib import Path -from typing import List, Union +from typing import List, Union, Callable -# TODO: Need testing -def get_all_file_paths_in_dir(dir_path: Path) -> List[Path]: - if not dir_path.is_dir(): - raise ValueError +class FileSystemItem(Enum): + PATH = 0 + SUBDIR = 1 + FILE = 2 - file_paths = [] - for root, _, files in os.walk(dir_path): - for file in files: - file_paths.append(Path(root) / file) - return file_paths +class Encoding(Enum): + ISO_ENCODING = 'ISO-8859-1' + UTF_ENCODING = 'utf-8' -# TODO: Need testing -def get_all_subdirs(dir_path: Path) -> List[Path]: - subdirs = {dir_path} - for file_path in get_all_file_paths_in_dir(dir_path): - subdirs.add(file_path.parent) +# Make sure all extensions (except an empty one) have a dot +class Extension(Enum): + EMPTY = '' + PY = '.py' + JAVA = '.java' + KT = '.kt' + JS = '.js' + KTS = '.kts' + + +ItemCondition = Callable[[str], bool] + + +def all_items_condition(name: str) -> bool: + return True + - return list(subdirs) +# To get all files or subdirs (depends on the last parameter) from root that match item_condition +# Note that all subdirs or files already contain the full path for them +def get_all_file_system_items(root: Path, item_condition: ItemCondition = all_items_condition, + item_type: FileSystemItem = FileSystemItem.FILE) -> List[Path]: + if not root.is_dir(): + raise ValueError(f'The {root} is not a directory') + + items = [] + for fs_tuple in os.walk(root): + for item in fs_tuple[item_type.value]: + if item_condition(item): + items.append(Path(os.path.join(fs_tuple[FileSystemItem.PATH.value], item))) + return items # TODO: Need testing @@ -35,18 +57,18 @@ def new_temp_dir() -> Path: yield Path(temp_dir) -def write(path: Union[str, Path], text: str): - """ - Write text to file. Create all parents if necessary - :param path: - :param text: - :return: - """ - path = Path(path) +# File should contain the full path and its extension. +# Create all parents if necessary +def create_file(file_path: Union[str, Path], content: str): + file_path = Path(file_path) + + create_directory(os.path.dirname(file_path)) + with open(file_path, 'w') as f: + f.write(content) - os.makedirs(path.parent, exist_ok=True) - with open(str(path), 'w') as file: - file.write(text) + +def create_directory(directory: str) -> None: + os.makedirs(directory, exist_ok=True) def get_file_line(path: Path, line_number: int): @@ -54,3 +76,15 @@ def get_file_line(path: Path, line_number: int): str(path), line_number ).strip() + + +def get_content_from_file(file_path: Path, encoding: str = Encoding.ISO_ENCODING.value, to_strip_nl: bool = True) -> str: + with open(file_path, 'r', encoding=encoding) as f: + content = f.read() + return content if not to_strip_nl else content.rstrip('\n') + + +# Not empty extensions are returned with a dot, for example, '.txt' +# If file has no extensions, an empty one ('') is returned +def get_extension_from_file(file: Path) -> Extension: + return Extension(os.path.splitext(file)[1]) diff --git a/src/python/review/common/java_compiler.py b/src/python/review/common/java_compiler.py index 08e3a349..aee76032 100644 --- a/src/python/review/common/java_compiler.py +++ b/src/python/review/common/java_compiler.py @@ -3,12 +3,14 @@ from pathlib import Path from typing import Union +from src.python.review.common.file_system import Encoding, Extension + logger = logging.getLogger(__name__) # TODO it cannot compile gradle-based projects def javac_project(dir_path: Path) -> bool: - return javac(f'$(find {dir_path} -name "*.java")') + return javac(f'$(find {dir_path} -name "*{Extension.JAVA.value}")') def javac(javac_args: Union[str, Path]) -> bool: @@ -18,7 +20,7 @@ def javac(javac_args: Union[str, Path]) -> bool: shell=True, stderr=subprocess.STDOUT ) - output_str = str(output_bytes, 'utf-8') + output_str = str(output_bytes, Encoding.UTF_ENCODING.value) if output_str: logger.debug(output_str) @@ -26,5 +28,5 @@ def javac(javac_args: Union[str, Path]) -> bool: return True except subprocess.CalledProcessError as error: logger.error(f'Failed compile java code with error: ' - f'{str(error.stdout, "utf-8")}') + f'{str(error.stdout, Encoding.UTF_ENCODING.value)}') return False diff --git a/src/python/review/common/language.py b/src/python/review/common/language.py index effa68cc..48780467 100644 --- a/src/python/review/common/language.py +++ b/src/python/review/common/language.py @@ -2,6 +2,8 @@ from pathlib import Path from typing import List +from src.python.review.common.file_system import Extension, get_extension_from_file + @unique class Language(Enum): @@ -13,19 +15,19 @@ class Language(Enum): EXTENSION_TO_LANGUAGE = { - '.java': Language.JAVA, - '.py': Language.PYTHON, - '.kt': Language.KOTLIN, - '.kts': Language.KOTLIN, - '.js': Language.JS, + Extension.JAVA: Language.JAVA, + Extension.PY: Language.PYTHON, + Extension.KT: Language.KOTLIN, + Extension.KTS: Language.KOTLIN, + Extension.JS: Language.JS, } def guess_file_language(file_path: Path) -> Language: - return EXTENSION_TO_LANGUAGE.get(file_path.suffix, Language.UNKNOWN) + return EXTENSION_TO_LANGUAGE.get(get_extension_from_file(file_path), Language.UNKNOWN) -def filter_paths(file_paths: List[Path], language: Language) -> List[Path]: +def filter_paths_by_language(file_paths: List[Path], language: Language) -> List[Path]: result = [] for path in file_paths: if guess_file_language(path) == language: diff --git a/src/python/review/common/parallel_runner.py b/src/python/review/common/parallel_runner.py index 35a16fd4..e9ab7d8d 100644 --- a/src/python/review/common/parallel_runner.py +++ b/src/python/review/common/parallel_runner.py @@ -25,10 +25,7 @@ def run_inspector(path: Path, def inspect_in_parallel(path: Path, config: ApplicationConfig, inspectors: List[BaseInspector]) -> List[BaseIssue]: - inspectors = filter( - lambda inspector: inspector.inspector_type not in config.disabled_inspectors, - inspectors - ) + inspectors = filter(lambda i: i.inspector_type not in config.disabled_inspectors, inspectors) if config.n_cpu == 1: issues = [] diff --git a/src/python/review/common/subprocess_runner.py b/src/python/review/common/subprocess_runner.py index 351362ce..3dd5cad3 100644 --- a/src/python/review/common/subprocess_runner.py +++ b/src/python/review/common/subprocess_runner.py @@ -5,7 +5,7 @@ logger = logging.getLogger(__name__) -def run_in_subprocess_return_code(command: List[str]) -> (int, str): +def run_in_subprocess(command: List[str]) -> str: process = subprocess.run( command, stdout=subprocess.PIPE, @@ -20,8 +20,4 @@ def run_in_subprocess_return_code(command: List[str]) -> (int, str): if stderr: logger.debug('%s\'s stderr:\n%s' % (command[0], stderr)) - return process.returncode, stdout - - -def run_in_subprocess(command: List[str]) -> str: - return run_in_subprocess_return_code(command)[1] + return stdout diff --git a/src/python/review/inspectors/base_inspector.py b/src/python/review/inspectors/base_inspector.py index 6fd03cb6..2e29253a 100644 --- a/src/python/review/inspectors/base_inspector.py +++ b/src/python/review/inspectors/base_inspector.py @@ -7,12 +7,28 @@ class BaseInspector(abc.ABC): + """ + Each external inspector contains a dictionary in which the IssueType corresponds to the original linter classes. + The dictionary helps to categorize errors during parsing the linters' output. + To add a new inspector, you need: + - to create a class that inherits from the BaseInspector class, + - define the type of inspector (the type filed) by adding a new option in the InspectorType, + - implement the function. + + Typically, the function launches a linter and parses its output (XML or JSON) to get a list of BaseIssue. + + Some inspectors (internal) do not require creating a dictionary with IssueType. + This is connected to the fact that they do not launch an additional analysis tool and work with the code directly, + for example, the python AST inspector. + """ + + # Type of inspection for analyzing, e.g. pylint, detekt and etc @property @abc.abstractmethod def inspector_type(self) -> InspectorType: - raise NotImplementedError + raise NotImplementedError(f'inspector_type property not implemented yet') @abc.abstractmethod def inspect(self, path: Path, config: dict) -> List[BaseIssue]: - raise NotImplementedError + raise NotImplementedError(f'inspect method not implemented yet') diff --git a/src/python/review/inspectors/flake8/flake8.py b/src/python/review/inspectors/flake8/flake8.py index d538c3a4..8fe5a567 100644 --- a/src/python/review/inspectors/flake8/flake8.py +++ b/src/python/review/inspectors/flake8/flake8.py @@ -7,7 +7,7 @@ from src.python.review.inspectors.base_inspector import BaseInspector from src.python.review.inspectors.flake8.issue_types import CODE_PREFIX_TO_ISSUE_TYPE, CODE_TO_ISSUE_TYPE from src.python.review.inspectors.inspector_type import InspectorType -from src.python.review.inspectors.issue import BaseIssue, CodeIssue, CyclomaticComplexityIssue, IssueType +from src.python.review.inspectors.issue import BaseIssue, CodeIssue, CyclomaticComplexityIssue, IssueType, IssueData from src.python.review.inspectors.tips import get_cyclomatic_complexity_tip logger = logging.getLogger(__name__) @@ -45,13 +45,9 @@ def parse(cls, output: str) -> List[BaseIssue]: file_path = Path(groups[0]) line_no = int(groups[1]) - issue_data = { - 'file_path': file_path, - 'line_no': line_no, - 'column_no': int(groups[2]) if int(groups[2]) > 0 else 1, - 'origin_class': origin_class, - 'inspector_type': cls.inspector_type, - } + issue_data = IssueData.get_base_issue_data_dict(file_path, cls.inspector_type, line_number=line_no, + column_number=int(groups[2]) if int(groups[2]) > 0 else 1, + origin_class=origin_class) if cc_match is not None: issue_data['description'] = get_cyclomatic_complexity_tip() issue_data['cc_value'] = int(cc_match.groups()[1]) diff --git a/src/python/review/inspectors/intellij/intellij.py b/src/python/review/inspectors/intellij/intellij.py index 7a224934..8b962947 100644 --- a/src/python/review/inspectors/intellij/intellij.py +++ b/src/python/review/inspectors/intellij/intellij.py @@ -5,7 +5,7 @@ from typing import Dict, List, Optional, Union from xml.etree import ElementTree -from src.python.review.common.file_system import get_all_file_paths_in_dir, new_temp_dir +from src.python.review.common.file_system import get_all_file_system_items, new_temp_dir from src.python.review.common.subprocess_runner import run_in_subprocess from src.python.review.inspectors.base_inspector import BaseInspector from src.python.review.inspectors.inspector_type import InspectorType @@ -71,7 +71,7 @@ def copy_files_to_project(self, path: Path) -> Dict[Path, Path]: file_paths = [path] elif path.is_dir(): root_path = path - file_paths = get_all_file_paths_in_dir(root_path) + file_paths = get_all_file_system_items(root_path) else: raise ValueError @@ -113,7 +113,7 @@ def _get_file_path_in_project(cls, relative_file_path: Path) -> Path: def parse(cls, out_dir_path: Path, path_in_project_to_origin_path: Dict[Path, Path]) -> List[BaseIssue]: out_file_paths = [ - file_path for file_path in get_all_file_paths_in_dir(out_dir_path) + file_path for file_path in get_all_file_system_items(out_dir_path) if file_path.suffix.endswith('.xml') and not file_path.name.startswith('.') ] diff --git a/src/python/review/inspectors/issue.py b/src/python/review/inspectors/issue.py index fd2eb87d..d9d41659 100644 --- a/src/python/review/inspectors/issue.py +++ b/src/python/review/inspectors/issue.py @@ -1,6 +1,8 @@ +import abc from dataclasses import dataclass from enum import Enum from pathlib import Path +from typing import Any, Dict, Union from src.python.review.inspectors.inspector_type import InspectorType @@ -24,6 +26,36 @@ class IssueType(Enum): METHOD_NUMBER = 'METHOD_NUMBER' +# Keys in results dictionary +class IssueData(Enum): + # Base fields + FILE_PATH = 'file_path' + LINE_NUMBER = 'line_no' + COLUMN_NUMBER = 'column_no' + ORIGIN_ClASS = 'origin_class' + INSPECTOR_TYPE = 'inspector_type' + + # Additional fields + ISSUE_TYPE = 'type' + DESCRIPTION = 'description' + + LINE_LEN = 'line_len' + FUNCTION_LEN = 'func_len' + BOOL_EXPR_LEN = 'bool_expr_len' + CYCLOMATIC_COMPLEXITY = 'cc_value' + + @classmethod + def get_base_issue_data_dict(cls, file_path: Union[str, Path], inspector_type: InspectorType, line_number: int = 1, + column_number: int = 1, origin_class: str = '') -> Dict[str, Any]: + return { + cls.FILE_PATH.value: file_path, + cls.LINE_NUMBER.value: line_number, + cls.COLUMN_NUMBER.value: column_number, + cls.ORIGIN_ClASS.value: origin_class, + cls.INSPECTOR_TYPE.value: inspector_type + } + + @dataclass(frozen=True, eq=True) class BaseIssue: file_path: Path @@ -37,72 +69,111 @@ class BaseIssue: type: IssueType +class Measurable(abc.ABC): + @abc.abstractmethod + def measure(self) -> int: + pass + + @dataclass(frozen=True) class CodeIssue(BaseIssue): pass @dataclass(frozen=True) -class BoolExprLenIssue(BaseIssue): +class BoolExprLenIssue(BaseIssue, Measurable): bool_expr_len: int type = IssueType.BOOL_EXPR_LEN + def measure(self) -> int: + return self.bool_expr_len + @dataclass(frozen=True) -class FuncLenIssue(BaseIssue): +class FuncLenIssue(BaseIssue, Measurable): func_len: int type = IssueType.FUNC_LEN + def measure(self) -> int: + return self.func_len + @dataclass(frozen=True) -class LineLenIssue(BaseIssue): +class LineLenIssue(BaseIssue, Measurable): line_len: int type = IssueType.LINE_LEN + def measure(self) -> int: + return self.line_len + @dataclass(frozen=True) -class CyclomaticComplexityIssue(BaseIssue): +class CyclomaticComplexityIssue(BaseIssue, Measurable): cc_value: int type = IssueType.CYCLOMATIC_COMPLEXITY + def measure(self) -> int: + return self.cc_value + @dataclass(frozen=True) -class InheritanceIssue(BaseIssue): +class InheritanceIssue(BaseIssue, Measurable): inheritance_tree_depth: int type = IssueType.INHERITANCE_DEPTH + def measure(self) -> int: + return self.inheritance_tree_depth + @dataclass(frozen=True) -class ChildrenNumberIssue(BaseIssue): +class ChildrenNumberIssue(BaseIssue, Measurable): children_number: int type = IssueType.CHILDREN_NUMBER + def measure(self) -> int: + return self.children_number + @dataclass(frozen=True) -class WeightedMethodIssue(BaseIssue): +class WeightedMethodIssue(BaseIssue, Measurable): weighted_method: int type = IssueType.WEIGHTED_METHOD + def measure(self) -> int: + return self.weighted_method + @dataclass(frozen=True) -class CouplingIssue(BaseIssue): +class CouplingIssue(BaseIssue, Measurable): class_objects_coupling: int type = IssueType.COUPLING + def measure(self) -> int: + return self.class_objects_coupling + @dataclass(frozen=True) -class CohesionIssue(BaseIssue): +class CohesionIssue(BaseIssue, Measurable): cohesion_lack: int type = IssueType.COHESION + def measure(self) -> int: + return self.cohesion_lack + @dataclass(frozen=True) -class ClassResponseIssue(BaseIssue): +class ClassResponseIssue(BaseIssue, Measurable): class_response: int type = IssueType.CLASS_RESPONSE + def measure(self) -> int: + return self.class_response + @dataclass(frozen=True) -class MethodNumberIssue(BaseIssue): +class MethodNumberIssue(BaseIssue, Measurable): method_number: int type = IssueType.METHOD_NUMBER + + def measure(self) -> int: + return self.method_number diff --git a/src/python/review/inspectors/parsers/checkstyle_parser.py b/src/python/review/inspectors/parsers/checkstyle_parser.py index 34a85379..9cf75acb 100644 --- a/src/python/review/inspectors/parsers/checkstyle_parser.py +++ b/src/python/review/inspectors/parsers/checkstyle_parser.py @@ -1,80 +1,112 @@ import logging import re from pathlib import Path -from typing import Callable, Dict, List +from typing import Callable, Dict, List, Any, Optional from xml.etree import ElementTree +from src.python.review.common.file_system import get_content_from_file from src.python.review.inspectors.inspector_type import InspectorType from src.python.review.inspectors.issue import BaseIssue, BoolExprLenIssue, CodeIssue, CyclomaticComplexityIssue, \ - FuncLenIssue, IssueType, LineLenIssue + FuncLenIssue, IssueType, LineLenIssue, IssueData from src.python.review.inspectors.tips import get_bool_expr_len_tip, get_cyclomatic_complexity_tip, get_func_len_tip, \ get_line_len_tip logger = logging.getLogger(__name__) -# TODO Needs testing -def parse_checkstyle_file_result( - file_path: Path, - inspector_type: InspectorType, - choose_issue_type: Callable[[str], IssueType], - origin_class_to_pattern_dict: Dict[str, str]) -> List[BaseIssue]: +# Check if the result of the inspectors is correct: it exists and it is not empty +def __is_result_file_correct(file_path: Path, inspector_type: InspectorType) -> bool: if not file_path.is_file(): logger.error('%s: error - no output file' % inspector_type.value) - return [] - - file_content = file_path.read_text() + return False + file_content = get_content_from_file(file_path) if not file_content: logger.error('%s: error - empty file' % inspector_type.value) + return False + return True + + +def __parse_error_message(element: ElementTree) -> str: + message = element.attrib['message'] + return re.sub(r'\(max allowed is \d+\). ', '', message) + + +# Measurable means that the issue has integer measure, +# e.g. BoolExprLenIssue, CyclomaticComplexityIssue and so on +def __parse_measurable_issue(issue_data: Dict[str, Any], issue_type: IssueType, + measure_value: int) -> Optional[BaseIssue]: + if issue_type == IssueType.CYCLOMATIC_COMPLEXITY: + issue_data[IssueData.CYCLOMATIC_COMPLEXITY.value] = measure_value + issue_data[IssueData.DESCRIPTION.value] = get_cyclomatic_complexity_tip() + return CyclomaticComplexityIssue(**issue_data) + elif issue_type == IssueType.FUNC_LEN: + issue_data[IssueData.FUNCTION_LEN.value] = measure_value + issue_data[IssueData.DESCRIPTION.value] = get_func_len_tip() + return FuncLenIssue(**issue_data) + elif issue_type == IssueType.BOOL_EXPR_LEN: + issue_data[IssueData.BOOL_EXPR_LEN.value] = measure_value + issue_data[IssueData.DESCRIPTION.value] = get_bool_expr_len_tip() + return BoolExprLenIssue(**issue_data) + elif issue_type == IssueType.LINE_LEN: + issue_data[IssueData.LINE_LEN.value] = measure_value + issue_data[IssueData.DESCRIPTION.value] = get_line_len_tip() + return LineLenIssue(**issue_data) + return None + + +def __should_handle_element(element: ElementTree) -> bool: + return element.tag == 'file' + + +def __is_error(element: ElementTree) -> bool: + return element.tag == 'error' + + +# TODO Needs testing +def parse_checkstyle_file_result( + file_path: Path, + inspector_type: InspectorType, + issue_type_selector: Callable[[str], IssueType], + origin_class_to_description: Dict[str, str]) -> List[BaseIssue]: + if not __is_result_file_correct(file_path, inspector_type): return [] + # Parse result XML tree = ElementTree.parse(file_path) - issues: List[BaseIssue] = [] + for element in tree.getroot(): - if element.tag == 'file': - code_file_path = Path(element.attrib['name']) - for inner_element in element: - if inner_element.tag == 'error': - message = inner_element.attrib['message'] - origin_class = inner_element.attrib['source'].split('.')[-1] - message = re.sub(r'\(max allowed is \d+\). ', '', message) - issue_type = choose_issue_type(origin_class) - line_no = int(inner_element.attrib['line']) - - issue_data = { - 'file_path': code_file_path, - 'line_no': line_no, - 'column_no': int(inner_element.attrib.get('column', 1)), - 'origin_class': origin_class, - 'inspector_type': inspector_type, - 'type': issue_type - } - - if origin_class not in origin_class_to_pattern_dict: - issue_data['description'] = message - issues.append(CodeIssue(**issue_data)) - else: - pattern = origin_class_to_pattern_dict.get(origin_class) - metric_value = int(re.search(pattern, message, - flags=re.IGNORECASE).groups()[0]) - - if issue_type == IssueType.CYCLOMATIC_COMPLEXITY: - issue_data['cc_value'] = metric_value - issue_data['description'] = get_cyclomatic_complexity_tip() - issues.append(CyclomaticComplexityIssue(**issue_data)) - elif issue_type == IssueType.FUNC_LEN: - issue_data['func_len'] = metric_value - issue_data['description'] = get_func_len_tip() - issues.append(FuncLenIssue(**issue_data)) - elif issue_type == IssueType.BOOL_EXPR_LEN: - issue_data['bool_expr_len'] = metric_value - issue_data['description'] = get_bool_expr_len_tip() - issues.append(BoolExprLenIssue(**issue_data)) - elif issue_type == IssueType.LINE_LEN: - issue_data['line_len'] = metric_value - issue_data['description'] = get_line_len_tip() - issues.append(LineLenIssue(**issue_data)) + if not __should_handle_element(element): + continue + + code_file_path = Path(element.attrib['name']) + for inner_element in element: + if not __is_error(inner_element): + continue + + message = __parse_error_message(inner_element) + origin_class = inner_element.attrib['source'].split('.')[-1] + issue_data = IssueData.get_base_issue_data_dict(code_file_path, inspector_type, + line_number=int(inner_element.attrib['line']), + column_number=int( + inner_element.attrib.get('column', 1)), + origin_class=origin_class) + + issue_type = issue_type_selector(origin_class) + issue_data[IssueData.ISSUE_TYPE.value] = issue_type + + if origin_class in origin_class_to_description: + pattern = origin_class_to_description.get(origin_class) + measure_value = int(re.search(pattern, message, + flags=re.IGNORECASE).groups()[0]) + + issue = __parse_measurable_issue(issue_data, issue_type, measure_value) + else: + issue_data[IssueData.DESCRIPTION.value] = message + issue = CodeIssue(**issue_data) + + if issue is not None: + issues.append(issue) return issues diff --git a/src/python/review/inspectors/pyast/python_ast.py b/src/python/review/inspectors/pyast/python_ast.py index 1cd28b89..28b76e9f 100644 --- a/src/python/review/inspectors/pyast/python_ast.py +++ b/src/python/review/inspectors/pyast/python_ast.py @@ -4,7 +4,7 @@ from typing import Dict, List from src.python.review.common import language -from src.python.review.common.file_system import get_all_file_paths_in_dir +from src.python.review.common.file_system import get_all_file_system_items from src.python.review.common.language import Language from src.python.review.inspectors.base_inspector import BaseInspector from src.python.review.inspectors.inspector_type import InspectorType @@ -113,9 +113,9 @@ def inspect(cls, path: Path, config: dict) -> List[BaseIssue]: if path.is_file(): path_to_files = [path] else: - path_to_files = get_all_file_paths_in_dir(path) + path_to_files = get_all_file_system_items(path) - path_to_files = language.filter_paths(path_to_files, Language.PYTHON) + path_to_files = language.filter_paths_by_language(path_to_files, Language.PYTHON) metrics = [] for path_to_file in path_to_files: diff --git a/src/python/review/inspectors/spotbugs/spotbugs.py b/src/python/review/inspectors/spotbugs/spotbugs.py index de0b85d2..e9cd1cb2 100644 --- a/src/python/review/inspectors/spotbugs/spotbugs.py +++ b/src/python/review/inspectors/spotbugs/spotbugs.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Dict, List -from src.python.review.common.file_system import get_all_file_paths_in_dir +from src.python.review.common.file_system import get_all_file_system_items from src.python.review.common.java_compiler import javac, javac_project from src.python.review.common.subprocess_runner import run_in_subprocess from src.python.review.inspectors.base_inspector import BaseInspector @@ -35,11 +35,11 @@ def _create_command(cls, path: Path) -> List[str]: def inspect(self, path: Path, config: dict) -> List[BaseIssue]: if path.is_file(): - compilation_ok = javac(path) + is_successful = javac(path) else: - compilation_ok = javac_project(path) + is_successful = javac_project(path) - if not compilation_ok: + if not is_successful: logger.error('%s: cant compile java files') return self._inspect_compiled(path) @@ -54,7 +54,7 @@ def _inspect_compiled(self, path: Path) -> List[BaseIssue]: if path.is_file(): file_paths = [path] else: - file_paths = get_all_file_paths_in_dir(path) + file_paths = get_all_file_system_items(path) java_file_paths = [file_path for file_path in file_paths if file_path.suffix == '.java'] file_path_counter = Counter(java_file_paths) diff --git a/src/python/review/inspectors/springlint/springlint.py b/src/python/review/inspectors/springlint/springlint.py index 97502c20..18f9d6b0 100644 --- a/src/python/review/inspectors/springlint/springlint.py +++ b/src/python/review/inspectors/springlint/springlint.py @@ -3,7 +3,7 @@ import re from pathlib import Path from shutil import copy -from typing import AnyStr, List, Optional +from typing import AnyStr, List, Optional, Dict, Any from src.python.review.common.file_system import new_temp_dir from src.python.review.common.subprocess_runner import run_in_subprocess @@ -11,7 +11,7 @@ from src.python.review.inspectors.inspector_type import InspectorType from src.python.review.inspectors.issue import BaseIssue, ChildrenNumberIssue, ClassResponseIssue, CodeIssue, \ CohesionIssue, \ - CouplingIssue, InheritanceIssue, IssueType, MethodNumberIssue, WeightedMethodIssue + CouplingIssue, InheritanceIssue, IssueType, MethodNumberIssue, WeightedMethodIssue, IssueData from src.python.review.inspectors.tips import get_child_number_tip, get_class_coupling_tip, get_class_response_tip, \ get_cohesion_tip, get_inheritance_depth_tip, get_method_number_tip, get_weighted_method_tip @@ -171,11 +171,5 @@ def _create_issue(cls, metric_name: str, return None @classmethod - def _get_common_issue_data(cls, file: Path) -> dict: - return { - 'file_path': file, - 'line_no': 1, - 'column_no': 1, - 'origin_class': '', - 'inspector_type': cls.inspector_type - } + def _get_common_issue_data(cls, file: Path) -> Dict[str, Any]: + return IssueData.get_base_issue_data_dict(file, cls.inspector_type) diff --git a/src/python/review/quality/evaluate_quality.py b/src/python/review/quality/evaluate_quality.py index 8e260d2a..3d78e392 100644 --- a/src/python/review/quality/evaluate_quality.py +++ b/src/python/review/quality/evaluate_quality.py @@ -1,8 +1,8 @@ -from typing import Dict +from typing import List from src.python.review.common.language import Language from src.python.review.inspectors.issue import IssueType -from src.python.review.quality.model import Quality +from src.python.review.quality.model import Quality, Rule from src.python.review.quality.rules.best_practices_scoring import ( BestPracticesRule, LANGUAGE_TO_BEST_PRACTICES_RULE_CONFIG ) @@ -26,50 +26,8 @@ from src.python.review.reviewers.utils.code_statistics import CodeStatistics -def get_statistics(statistics: CodeStatistics) -> Dict[IssueType, int]: - return { - IssueType.CODE_STYLE: - statistics.code_style_lines, - - IssueType.BEST_PRACTICES: - statistics.n_best_practices_issue, - - IssueType.ERROR_PRONE: - statistics.n_error_prone_issues, - - IssueType.CYCLOMATIC_COMPLEXITY: - statistics.max_cyclomatic_complexity, - - IssueType.FUNC_LEN: - statistics.max_func_len, - - IssueType.LINE_LEN: - statistics.n_line_len, - - IssueType.BOOL_EXPR_LEN: - statistics.max_bool_expr_len, - - IssueType.INHERITANCE_DEPTH: - statistics.inheritance_depth, - - IssueType.METHOD_NUMBER: - statistics.method_number, - - IssueType.COUPLING: - statistics.coupling, - - IssueType.CLASS_RESPONSE: - statistics.class_response, - - IssueType.WEIGHTED_METHOD: - statistics.weighted_method_complexities - } - - -def evaluate_quality(statistics: CodeStatistics, language: Language) -> Quality: - rule_statistics = get_statistics(statistics) - - rules = [ +def __get_available_rules(language: Language) -> List[Rule]: + return [ ErrorProneRule(LANGUAGE_TO_ERROR_PRONE_RULE_CONFIG[language]), BestPracticesRule(LANGUAGE_TO_BEST_PRACTICES_RULE_CONFIG[language]), CyclomaticComplexityRule(LANGUAGE_TO_CYCLOMATIC_COMPLEXITY_RULE_CONFIG[language]), @@ -82,9 +40,15 @@ def evaluate_quality(statistics: CodeStatistics, language: Language) -> Quality: WeightedMethodsRule(LANGUAGE_TO_WEIGHTED_METHODS_RULE_CONFIG[language]) ] + +def evaluate_quality(statistics: CodeStatistics, language: Language) -> Quality: + rule_statistics = statistics.issue_type_to_statistics_dict + + rules = __get_available_rules(language) for rule in rules: rule.apply(rule_statistics[rule.rule_type]) + # TODO: refactor code_style_rule = CodeStyleRule(LANGUAGE_TO_CODE_STYLE_RULE_CONFIG[language]) code_style_rule.apply(rule_statistics[IssueType.CODE_STYLE], statistics.n_code_style_issues, statistics.total_lines) diff --git a/src/python/review/quality/model.py b/src/python/review/quality/model.py index 38ce7e1b..6fe03dcc 100644 --- a/src/python/review/quality/model.py +++ b/src/python/review/quality/model.py @@ -4,6 +4,7 @@ from typing import List from src.python.review.inspectors.issue import IssueType +from src.python.review.reviewers.utils.code_statistics import CodeStatistics @total_ordering @@ -43,14 +44,13 @@ def __init__(self, rules: List[Rule]): @property def quality_type(self) -> QualityType: - return min((rule.quality_type for rule in self.rules), - default=QualityType.EXCELLENT) + return min([rule.quality_type for rule in self.rules], default=QualityType.EXCELLENT) @property def next_quality_type(self) -> QualityType: - return min((rule.next_level_type for rule in self.rules), - default=QualityType.EXCELLENT) + return min([rule.next_level_type for rule in self.rules], default=QualityType.EXCELLENT) + # TODO@nbirillo: why rule.quality_type == quality_type for next level???? @property def next_level_requirements(self) -> List[Rule]: quality_type = self.quality_type diff --git a/src/python/review/quality/rules/code_style_scoring.py b/src/python/review/quality/rules/code_style_scoring.py index eaa2c17d..882b0c5c 100644 --- a/src/python/review/quality/rules/code_style_scoring.py +++ b/src/python/review/quality/rules/code_style_scoring.py @@ -64,6 +64,7 @@ def __init__(self, config: CodeStyleRuleConfig): self.quality_type = None self.next_level_delta = 0 + # TODO: refactor def apply(self, n_code_style_lines, n_code_style, total_lines): self.total_lines = total_lines self.n_code_style_lines = n_code_style_lines diff --git a/src/python/review/quality/rules/line_len_scoring.py b/src/python/review/quality/rules/line_len_scoring.py index 664f3dfd..d52b0382 100644 --- a/src/python/review/quality/rules/line_len_scoring.py +++ b/src/python/review/quality/rules/line_len_scoring.py @@ -29,6 +29,7 @@ def __init__(self, config: LineLengthRuleConfig): self.config = config self.rule_type = IssueType.LINE_LEN + # TODO: refactor def apply(self, n_line_len, n_lines): self.ratio = n_line_len / max(n_lines, 1) self.n_line_len = n_line_len diff --git a/src/python/review/reviewers/common.py b/src/python/review/reviewers/common.py index 3d7d8a06..0d6688c4 100644 --- a/src/python/review/reviewers/common.py +++ b/src/python/review/reviewers/common.py @@ -16,7 +16,7 @@ from src.python.review.quality.model import Quality from src.python.review.reviewers.review_result import FileReviewResult, ReviewResult from src.python.review.reviewers.utils.code_statistics import gather_code_statistics -from src.python.review.reviewers.utils.issues_filter import filter_duplicate_issues, filter_low_metric_issues +from src.python.review.reviewers.utils.issues_filter import filter_duplicate_issues, filter_low_measure_issues from src.python.review.reviewers.utils.metadata_exploration import FileMetadata, Metadata LANGUAGE_TO_INSPECTORS = { @@ -47,7 +47,7 @@ def perform_language_review(metadata: Metadata, issues = inspect_in_parallel(metadata.path, config, inspectors) if issues: - issues = filter_low_metric_issues(issues, language) + issues = filter_low_measure_issues(issues, language) if not config.allow_duplicates: issues = filter_duplicate_issues(issues) diff --git a/src/python/review/reviewers/python.py b/src/python/review/reviewers/python.py index a906e6a6..a239913a 100644 --- a/src/python/review/reviewers/python.py +++ b/src/python/review/reviewers/python.py @@ -2,7 +2,7 @@ from typing import List from src.python.review.application_config import ApplicationConfig -from src.python.review.common.file_system import get_all_subdirs +from src.python.review.common.file_system import get_all_file_system_items, FileSystemItem from src.python.review.common.language import Language from src.python.review.reviewers.common import perform_language_review from src.python.review.reviewers.review_result import ReviewResult @@ -24,7 +24,7 @@ def perform_python_review(metadata: Metadata, config: ApplicationConfig) -> Revi def create_init_scripts_in_subdirs(path: Path) -> List[Path]: created_file_paths = [] - for subdir in get_all_subdirs(path): + for subdir in get_all_file_system_items(path, item_type=FileSystemItem.SUBDIR): init_file_path = subdir / '__init__.py' if not init_file_path.is_file(): init_file_path.touch() diff --git a/src/python/review/reviewers/utils/code_statistics.py b/src/python/review/reviewers/utils/code_statistics.py index b9e3709a..fae50778 100644 --- a/src/python/review/reviewers/utils/code_statistics.py +++ b/src/python/review/reviewers/utils/code_statistics.py @@ -1,36 +1,65 @@ from collections import Counter from dataclasses import dataclass from pathlib import Path -from typing import List +from typing import List, Dict +from src.python.review.common.file_system import get_content_from_file from src.python.review.inspectors.issue import BaseIssue, IssueType @dataclass class CodeStatistics: - n_code_style_issues: int n_best_practices_issue: int n_error_prone_issues: int - max_bool_expr_len: int - max_func_len: int n_line_len: int + + method_number: int + max_cyclomatic_complexity: int + max_func_len: int + max_bool_expr_len: int + + code_style_lines: int inheritance_depth: int - class_response: int coupling: int + class_response: int weighted_method_complexities: int - method_number: int + + n_code_style_issues: int total_lines: int - code_style_lines: int + + @property + def issue_type_to_statistics_dict(self) -> Dict[IssueType, int]: + return { + IssueType.BEST_PRACTICES: self.n_best_practices_issue, + IssueType.ERROR_PRONE: self.n_error_prone_issues, + IssueType.LINE_LEN: self.n_line_len, + + IssueType.METHOD_NUMBER: self.method_number, + + IssueType.CYCLOMATIC_COMPLEXITY: self.max_cyclomatic_complexity, + IssueType.FUNC_LEN: self.max_func_len, + IssueType.BOOL_EXPR_LEN: self.max_bool_expr_len, + + IssueType.CODE_STYLE: self.code_style_lines, + IssueType.INHERITANCE_DEPTH: self.inheritance_depth, + IssueType.COUPLING: self.coupling, + IssueType.CLASS_RESPONSE: self.class_response, + IssueType.WEIGHTED_METHOD: self.weighted_method_complexities + } -def get_total_lines(path): - lines = open(str(path), 'r').readlines() - total_lines = 0 - for line in lines: - if len(line.strip()) > 0 and not line.strip().startswith(('#', '//')): - total_lines += 1 - return total_lines +def __get_total_lines(path: Path) -> int: + lines = get_content_from_file(path, to_strip_nl=False).splitlines() + return len(list(filter(lambda line: not __is_empty(line) and not __is_comment(line), lines))) + + +def __is_empty(line: str) -> bool: + return len(line.strip()) == 0 + + +def __is_comment(line: str) -> bool: + return line.strip().startswith(('#', '//')) def get_code_style_lines(issues: List[BaseIssue]) -> int: @@ -39,64 +68,41 @@ def get_code_style_lines(issues: List[BaseIssue]) -> int: return len(line_counter) +def __get_max_measure_by_issue_type(issue_type: IssueType, issues: List[BaseIssue]) -> int: + return max(map( + lambda issue: issue.measure, + filter(lambda issue: issue.type == issue_type, issues) + ), default=0) + + # TODO: Need testing def gather_code_statistics(issues: List[BaseIssue], path: Path) -> CodeStatistics: issue_type_counter = Counter([issue.type for issue in issues]) - bool_expr_lens = map( - lambda issue: issue.bool_expr_len, - filter(lambda issue: issue.type == IssueType.BOOL_EXPR_LEN, issues) - ) - func_lens = map( - lambda issue: issue.func_len, - filter(lambda issue: issue.type == IssueType.FUNC_LEN, issues) - ) - - cyclomatic_complexities = map( - lambda issue: issue.cc_value, - filter(lambda issue: issue.type == IssueType.CYCLOMATIC_COMPLEXITY, issues) - ) + bool_expr_lens = __get_max_measure_by_issue_type(IssueType.BOOL_EXPR_LEN, issues) + func_lens = __get_max_measure_by_issue_type(IssueType.FUNC_LEN, issues) + cyclomatic_complexities = __get_max_measure_by_issue_type(IssueType.CYCLOMATIC_COMPLEXITY, issues) # Actually, we expect only one issue with each of the following metrics. - - inheritance_depths = map( - lambda issue: issue.inheritance_tree_depth, - filter(lambda issue: issue.type == IssueType.INHERITANCE_DEPTH, issues) - ) - - class_responses = map( - lambda issue: issue.class_response, - filter(lambda issue: issue.type == IssueType.CLASS_RESPONSE, issues) - ) - - couplings = map( - lambda issue: issue.class_objects_coupling, - filter(lambda issue: issue.type == IssueType.COUPLING, issues) - ) - - weighted_method_complexities = map( - lambda issue: issue.weighted_method, - filter(lambda issue: issue.type == IssueType.WEIGHTED_METHOD, issues) - ) - - method_numbers = map( - lambda issue: issue.method_number, - filter(lambda issue: issue.type == IssueType.METHOD_NUMBER, issues) - ) + inheritance_depths = __get_max_measure_by_issue_type(IssueType.INHERITANCE_DEPTH, issues) + class_responses = __get_max_measure_by_issue_type(IssueType.CLASS_RESPONSE, issues) + couplings = __get_max_measure_by_issue_type(IssueType.COUPLING, issues) + weighted_method_complexities = __get_max_measure_by_issue_type(IssueType.WEIGHTED_METHOD, issues) + method_numbers = __get_max_measure_by_issue_type(IssueType.METHOD_NUMBER, issues) return CodeStatistics( - issue_type_counter[IssueType.CODE_STYLE], - issue_type_counter[IssueType.BEST_PRACTICES], - issue_type_counter[IssueType.ERROR_PRONE], - max(bool_expr_lens, default=0), - max(func_lens, default=0), - issue_type_counter[IssueType.LINE_LEN], - max(cyclomatic_complexities, default=0), - max(inheritance_depths, default=0), - max(class_responses, default=0), - max(couplings, default=0), - max(weighted_method_complexities, default=0), - max(method_numbers, default=0), - get_total_lines(path), - get_code_style_lines(issues) + n_code_style_issues=issue_type_counter[IssueType.CODE_STYLE], + n_best_practices_issue=issue_type_counter[IssueType.BEST_PRACTICES], + n_error_prone_issues=issue_type_counter[IssueType.ERROR_PRONE], + max_bool_expr_len=bool_expr_lens, + max_func_len=func_lens, + n_line_len=issue_type_counter[IssueType.LINE_LEN], + max_cyclomatic_complexity=cyclomatic_complexities, + inheritance_depth=inheritance_depths, + class_response=class_responses, + coupling=couplings, + weighted_method_complexities=weighted_method_complexities, + method_number=method_numbers, + total_lines=__get_total_lines(path), + code_style_lines=get_code_style_lines(issues) ) diff --git a/src/python/review/reviewers/utils/issues_filter.py b/src/python/review/reviewers/utils/issues_filter.py index cb223cf3..9ac9b0b6 100644 --- a/src/python/review/reviewers/utils/issues_filter.py +++ b/src/python/review/reviewers/utils/issues_filter.py @@ -1,7 +1,8 @@ -from typing import Dict, List +from collections import defaultdict +from typing import Dict, List, Tuple from src.python.review.common.language import Language -from src.python.review.inspectors.issue import BaseIssue, IssueType +from src.python.review.inspectors.issue import BaseIssue, IssueType, Measurable from src.python.review.quality.rules.boolean_length_scoring import LANGUAGE_TO_BOOLEAN_EXPRESSION_RULE_CONFIG from src.python.review.quality.rules.class_response_scoring import LANGUAGE_TO_RESPONSE_RULE_CONFIG from src.python.review.quality.rules.coupling_scoring import LANGUAGE_TO_COUPLING_RULE_CONFIG @@ -12,67 +13,57 @@ from src.python.review.quality.rules.weighted_methods_scoring import LANGUAGE_TO_WEIGHTED_METHODS_RULE_CONFIG -def filter_low_metric_issues(issues: List[BaseIssue], - language: Language) -> List[BaseIssue]: - filtered_issues = [] +def __get_issue_type_to_low_measure_dict(language: Language) -> Dict[IssueType, int]: + return { + IssueType.CYCLOMATIC_COMPLEXITY: LANGUAGE_TO_CYCLOMATIC_COMPLEXITY_RULE_CONFIG[language].cc_value_moderate, + IssueType.FUNC_LEN: LANGUAGE_TO_FUNCTION_LENGTH_RULE_CONFIG[language].func_len_bad, + IssueType.BOOL_EXPR_LEN: LANGUAGE_TO_BOOLEAN_EXPRESSION_RULE_CONFIG[language].bool_expr_len_good, + IssueType.INHERITANCE_DEPTH: LANGUAGE_TO_INHERITANCE_DEPTH_RULE_CONFIG[language].depth_bad, + IssueType.METHOD_NUMBER: LANGUAGE_TO_METHOD_NUMBER_RULE_CONFIG[language].method_number_good, + IssueType.COUPLING: LANGUAGE_TO_COUPLING_RULE_CONFIG[language].coupling_moderate, + IssueType.CLASS_RESPONSE: LANGUAGE_TO_RESPONSE_RULE_CONFIG[language].response_good, + IssueType.WEIGHTED_METHOD: LANGUAGE_TO_WEIGHTED_METHODS_RULE_CONFIG[language].weighted_methods_good + } - func_len_rule_config = LANGUAGE_TO_FUNCTION_LENGTH_RULE_CONFIG[language] - boolean_expression_rule_config = LANGUAGE_TO_BOOLEAN_EXPRESSION_RULE_CONFIG[language] - cyclomatic_complexity_rule_config = LANGUAGE_TO_CYCLOMATIC_COMPLEXITY_RULE_CONFIG[language] - inheritance_depth_rule_config = LANGUAGE_TO_INHERITANCE_DEPTH_RULE_CONFIG[language] - method_number_rule_config = LANGUAGE_TO_METHOD_NUMBER_RULE_CONFIG[language] - coupling_rule_config = LANGUAGE_TO_COUPLING_RULE_CONFIG[language] - response_rule_config = LANGUAGE_TO_RESPONSE_RULE_CONFIG[language] - weighted_methods_rule_config = LANGUAGE_TO_WEIGHTED_METHODS_RULE_CONFIG[language] - # TODO make an abstraction for extraction the value - for issue in issues: - if (issue.type == IssueType.CYCLOMATIC_COMPLEXITY - and issue.cc_value <= cyclomatic_complexity_rule_config.cc_value_moderate): - continue - - if (issue.type == IssueType.FUNC_LEN - and issue.func_len <= func_len_rule_config.func_len_bad): - continue - - if (issue.type == IssueType.BOOL_EXPR_LEN - and issue.bool_expr_len <= boolean_expression_rule_config.bool_expr_len_good): - continue +def __more_than_low_measure(issue: BaseIssue, issue_type_to_low_measure_dict: Dict[IssueType, int]) -> bool: + issue_type = issue.type + if isinstance(issue, Measurable) and issue.measure() <= issue_type_to_low_measure_dict.get(issue_type, -1): + return False + return True - if (issue.type == IssueType.INHERITANCE_DEPTH - and issue.inheritance_tree_depth <= inheritance_depth_rule_config.depth_bad): - continue - if (issue.type == IssueType.METHOD_NUMBER - and issue.method_number <= method_number_rule_config.method_number_good): - continue +def filter_low_measure_issues(issues: List[BaseIssue], + language: Language) -> List[BaseIssue]: + issue_type_to_low_measure_dict = __get_issue_type_to_low_measure_dict(language) - if (issue.type == IssueType.CLASS_RESPONSE - and issue.class_response <= response_rule_config.response_good): - continue + # Disable this types of issue, requires further investigation. + ignored_issues = [IssueType.COHESION, IssueType.CHILDREN_NUMBER] - if (issue.type == IssueType.WEIGHTED_METHOD - and issue.weighted_method <= weighted_methods_rule_config.weighted_methods_good): - continue + return list(filter( + lambda issue: issue.type not in ignored_issues and __more_than_low_measure(issue, + issue_type_to_low_measure_dict), + issues)) - if (issue.type == IssueType.COUPLING - and issue.class_objects_coupling <= coupling_rule_config.coupling_moderate): - continue - # Disable this types of issue, requires further investigation. - if (issue.type == IssueType.COHESION or issue.type == IssueType.CHILDREN_NUMBER): - continue +FilePath = str +LinesNumber = int +Inspector = str +GroupedIssues = Dict[FilePath, Dict[LinesNumber, Dict[Inspector, Dict[IssueType, List[BaseIssue]]]]] - filtered_issues.append(issue) - return filtered_issues +def __init_grouped_issues() -> GroupedIssues: + return defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [])))) def filter_duplicate_issues(issues: List[BaseIssue]) -> List[BaseIssue]: """ - Skipping duplicate issues using heuristic rules + Skipping duplicate issues using heuristic rules: + + For each line's number try to count issues with unique type for each unique inspector and select the best one. + The inspector with the biggest number of issues for each type will be chosen. """ - grouped_issues = group_issues_by_file_line_inspector_and_type(issues) + grouped_issues = group_issues(issues) selected_issues = [] for _, issues_in_file in grouped_issues.items(): @@ -85,42 +76,50 @@ def filter_duplicate_issues(issues: List[BaseIssue]) -> List[BaseIssue]: selected_issues.extend(all_issues) # conflicts -> take issues found by a more informative inspector elif len(issues_in_line) > 1: - inspectors_by_types = {} + default_inspector = 'UNKNOWN' + # By default for each we add the tuple (inspector: 'UNKNOWN', issue_type_freq: -1) + inspectors_by_types: Dict[IssueType, Tuple[Inspector, int]] = defaultdict( + lambda: (default_inspector, -1)) for inspector, issues_by_types in issues_in_line.items(): + # Handle all possible issue types for issue_type in IssueType: - count = len(issues_by_types.get(issue_type, [])) - if count == 0: + issue_type_freq = len(issues_by_types.get(issue_type, [])) + # This was not find by the + if issue_type_freq == 0: continue - if issue_type not in inspectors_by_types: - inspectors_by_types[issue_type] = ('UNKNOWN', -1) - if count > inspectors_by_types[issue_type][1]: - inspectors_by_types[issue_type] = (inspector, count) + max_issue_type_freq = inspectors_by_types[issue_type][1] + # Current inspector has more issues with type than previous ones + if issue_type_freq > max_issue_type_freq: + inspectors_by_types[issue_type] = (inspector, issue_type_freq) - for issue_type, inspector in inspectors_by_types.items(): - selected_issues.extend(issues_in_line[inspector[0]][issue_type]) + for issue_type, inspector_to_freq in inspectors_by_types.items(): + inspector = inspector_to_freq[0] + if inspector != default_inspector: + selected_issues.extend(issues_in_line[inspector][issue_type]) return selected_issues -def group_issues_by_file_line_inspector_and_type(issues: List[BaseIssue]) -> dict: - grouped_issues: Dict[str, Dict[int, Dict[str, Dict[IssueType, List[BaseIssue]]]]] = {} +def group_issues(issues: List[BaseIssue]) -> GroupedIssues: + """ + Group issues according to the following structure: + - FILE_PATH: + - LINES_NUMBER: + - INSPECTOR: + - ISSUE_TYPE: + [ISSUES] + + We will consider each file to find potential duplicates: + if one line number in the file contains several same issues which were found by different inspectors, + we will try to find the best one. See function. + """ + grouped_issues: GroupedIssues = __init_grouped_issues() for issue in issues: file_path = str(issue.file_path) - if file_path not in grouped_issues: - grouped_issues[file_path] = {} - line_no = issue.line_no - if line_no not in grouped_issues[file_path]: - grouped_issues[file_path][line_no] = {} - inspector_name = str(issue.inspector_type) - if inspector_name not in grouped_issues[file_path][line_no]: - grouped_issues[file_path][line_no][inspector_name] = {} - issue_type = issue.type - if issue_type not in grouped_issues[file_path][line_no][inspector_name]: - grouped_issues[file_path][line_no][inspector_name][issue_type] = [] grouped_issues[file_path][line_no][inspector_name][issue_type].append(issue) diff --git a/src/python/review/reviewers/utils/metadata_exploration.py b/src/python/review/reviewers/utils/metadata_exploration.py index f3ac8ff2..f928a1a9 100644 --- a/src/python/review/reviewers/utils/metadata_exploration.py +++ b/src/python/review/reviewers/utils/metadata_exploration.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Dict, List, Set, Union -from src.python.review.common.file_system import get_all_file_paths_in_dir +from src.python.review.common.file_system import get_all_file_system_items, get_extension_from_file from src.python.review.common.language import guess_file_language, Language @@ -16,7 +16,7 @@ class FileMetadata: @property def extension(self) -> str: - return self.path.suffix + return get_extension_from_file(self.path).value @dataclass @@ -39,7 +39,7 @@ def size_bytes(self) -> int: def extension_to_files(self) -> Dict[str, List[FileMetadata]]: extension_to_files = defaultdict(list) for file in self.inner_files: - extension_to_files[file.path.suffix].append(file) + extension_to_files[get_extension_from_file(file.path)].append(file) return extension_to_files @property @@ -59,7 +59,7 @@ def extensions(self) -> Set[str]: def explore_file(path: Path) -> FileMetadata: if not path.exists() or not path.is_file(): - raise AssertionError + raise ValueError(f'The file {path} does not exist or is not a file') stat = os.stat(path) language = guess_file_language(path) @@ -68,10 +68,10 @@ def explore_file(path: Path) -> FileMetadata: def explore_project(path: Path) -> ProjectMetadata: if not path.exists() or not path.is_dir(): - raise AssertionError + raise ValueError(f'The file {path} does not exist or is not a directory') inner_files = [] - for file_path in get_all_file_paths_in_dir(path): + for file_path in get_all_file_system_items(path): inner_files.append(explore_file(file_path)) return ProjectMetadata(path, inner_files) diff --git a/src/python/review/reviewers/utils/print_review.py b/src/python/review/reviewers/utils/print_review.py index 94e0dbf8..76c2de04 100644 --- a/src/python/review/reviewers/utils/print_review.py +++ b/src/python/review/reviewers/utils/print_review.py @@ -8,8 +8,7 @@ def print_review_result_as_text(review_result: ReviewResult, path: Path) -> None: - heading = f'Review of {str(path)} ({len(review_result.all_issues)} violations)' - print() + heading = f'\nReview of {str(path)} ({len(review_result.all_issues)} violations)' print(heading) if len(review_result.all_issues) == 0: diff --git a/test/python/functional_tests/test_verbosity.py b/test/python/functional_tests/test_verbosity.py index 5efcfc29..67df44eb 100644 --- a/test/python/functional_tests/test_verbosity.py +++ b/test/python/functional_tests/test_verbosity.py @@ -18,9 +18,9 @@ def test_disable_logs_text(local_command: LocalCommandBuilder): output = process.stdout.decode() output = output.lower() - assert 'debug' not in output - assert 'info' not in output - assert 'error' not in output + assert ' debug ' not in output + assert ' info ' not in output + assert ' error ' not in output def test_disable_logs_json(local_command: LocalCommandBuilder): @@ -54,4 +54,4 @@ def test_enable_all_logs(local_command: LocalCommandBuilder): output = process.stdout.decode() output = output.lower() - assert 'debug' in output + assert ' debug ' in output diff --git a/test/python/inspectors/test_checkstyle_inspector.py b/test/python/inspectors/test_checkstyle_inspector.py index 6d3043df..892ec97f 100644 --- a/test/python/inspectors/test_checkstyle_inspector.py +++ b/test/python/inspectors/test_checkstyle_inspector.py @@ -2,7 +2,7 @@ from src.python.review.common.language import Language from src.python.review.inspectors.checkstyle.checkstyle import CheckstyleInspector -from src.python.review.reviewers.utils.issues_filter import filter_low_metric_issues +from src.python.review.reviewers.utils.issues_filter import filter_low_measure_issues from test.python.inspectors import JAVA_DATA_FOLDER from test.python.inspectors.conftest import gather_issues_test_info, IssuesTestInfo, use_file_metadata @@ -53,7 +53,7 @@ def test_file_with_issues(file_name: str, n_issues: int): path_to_file = JAVA_DATA_FOLDER / file_name with use_file_metadata(path_to_file) as file_metadata: issues = inspector.inspect(file_metadata.path, {}) - issues = filter_low_metric_issues(issues, Language.JAVA) + issues = filter_low_measure_issues(issues, Language.JAVA) assert len(issues) == n_issues diff --git a/test/python/inspectors/test_detekt_inspector.py b/test/python/inspectors/test_detekt_inspector.py index 2860fc9a..4dbafd2d 100644 --- a/test/python/inspectors/test_detekt_inspector.py +++ b/test/python/inspectors/test_detekt_inspector.py @@ -2,7 +2,7 @@ from src.python.review.common.language import Language from src.python.review.inspectors.detekt.detekt import DetektInspector -from src.python.review.reviewers.utils.issues_filter import filter_low_metric_issues +from src.python.review.reviewers.utils.issues_filter import filter_low_measure_issues from test.python.inspectors import KOTLIN_DATA_FOLDER from test.python.inspectors.conftest import use_file_metadata @@ -38,6 +38,6 @@ def test_file_with_issues(file_name: str, n_issues: int): path_to_file = KOTLIN_DATA_FOLDER / file_name with use_file_metadata(path_to_file) as file_metadata: issues = inspector.inspect(file_metadata.path, {}) - issues = filter_low_metric_issues(issues, Language.KOTLIN) + issues = filter_low_measure_issues(issues, Language.KOTLIN) assert len(issues) == n_issues diff --git a/test/python/inspectors/test_eslint_inspector.py b/test/python/inspectors/test_eslint_inspector.py index 5a114dc9..076c76a0 100644 --- a/test/python/inspectors/test_eslint_inspector.py +++ b/test/python/inspectors/test_eslint_inspector.py @@ -2,7 +2,7 @@ from src.python.review.common.language import Language from src.python.review.inspectors.eslint.eslint import ESLintInspector -from src.python.review.reviewers.utils.issues_filter import filter_low_metric_issues +from src.python.review.reviewers.utils.issues_filter import filter_low_measure_issues from test.python.inspectors import JS_DATA_FOLDER from test.python.inspectors.conftest import use_file_metadata @@ -20,6 +20,6 @@ def test_file_with_issues(file_name: str, n_issues: int): path_to_file = JS_DATA_FOLDER / file_name with use_file_metadata(path_to_file) as file_metadata: issues = inspector.inspect(file_metadata.path, {}, True) - issues = filter_low_metric_issues(issues, Language.JS) + issues = filter_low_measure_issues(issues, Language.JS) assert len(issues) == n_issues diff --git a/test/python/inspectors/test_flake8_inspector.py b/test/python/inspectors/test_flake8_inspector.py index 97d4bf31..c723a2db 100644 --- a/test/python/inspectors/test_flake8_inspector.py +++ b/test/python/inspectors/test_flake8_inspector.py @@ -3,7 +3,7 @@ from src.python.review.common.language import Language from src.python.review.inspectors.flake8.flake8 import Flake8Inspector from src.python.review.inspectors.issue import IssueType -from src.python.review.reviewers.utils.issues_filter import filter_low_metric_issues +from src.python.review.reviewers.utils.issues_filter import filter_low_measure_issues from test.python.inspectors import PYTHON_DATA_FOLDER from test.python.inspectors.conftest import gather_issues_test_info, IssuesTestInfo, use_file_metadata @@ -38,7 +38,7 @@ def test_file_with_issues(file_name: str, n_issues: int): path_to_file = PYTHON_DATA_FOLDER / file_name with use_file_metadata(path_to_file) as file_metadata: issues = inspector.inspect(file_metadata.path, {}) - issues = filter_low_metric_issues(issues, Language.PYTHON) + issues = filter_low_measure_issues(issues, Language.PYTHON) assert len(issues) == n_issues