Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions src/python/evaluation/inspectors/inspectors_stat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Hyperstyle evaluation: inspectors statistics gathering

This module allows gathering statistics about inspections that are used
during analysis for a specific language. We collect all available issues' keys,
removed ignored ones and gather statistics for fours main categories:

- code style issues;
- best practice issues;
- error-prone issues;
- code complexity issues.

More information about these categories can be found on [this](https://support.hyperskill.org/hc/en-us/articles/360049582712-Code-style-Code-quality) page.

## Current statistics

The current statistics is:

| Error prone | Code style | Code complexity | Best practice
---------------| -----------|-------------|-----------------|--------------
| Python | 162 | 146 | 35 | 254 |
| Java | 105 | 133 | 15 | 203 |
| JavaScript | 15 | 17 | 1 | 34 |
| Kotlin | 21 | 70 | 12 | 75 |


## Usage

Run the [statistics_gathering.py](statistics_gathering.py) with the arguments from command line.

Required arguments:

`language` — the language for which statistics will be gathering.
Available values are: `python`, `java`, `kotlin`, `javascript`.

An example of the output is:

```text
Collected statistics for python language:
best practices: 254 times;
code style: 146 times;
complexity: 35 times;
error prone: 162 times;
undefined: 3 times;
Note: undefined means a category that is not categorized among the four main categories. Most likely it is info category
```
Empty file.
Empty file.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
PYTHON_RADON_ISSUES = {
'RAD100': 'MAINTAINABILITY index',
}

PYTHON_AST_ISSUES = {
'C001': 'Boolean expressions length',
'C002': 'Functions length',
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import argparse
from typing import Callable, Dict, List, Set, Tuple

from src.python.evaluation.inspectors.inspectors_stat.issues.flake8_all_issues import (
ALL_BUGBEAR_ISSUES, ALL_BUILTINS_ISSUES, ALL_COMPREHENSIONS_ISSUES, ALL_FORMAT_STRING_ISSUES,
ALL_IMPORT_ORDER_ISSUES, ALL_RETURN_ISSUES, ALL_SPELLCHECK_ISSUES, ALL_STANDARD_ISSUES, ALL_WPS_ISSUES,
FLAKE8_DISABLED_ISSUES,
)
from src.python.evaluation.inspectors.inspectors_stat.issues.other_issues import PYTHON_AST_ISSUES, PYTHON_RADON_ISSUES
from src.python.evaluation.inspectors.inspectors_stat.issues.pylint_all_issues import ALL_ISSUES, PYLINT_DISABLED_ISSUES
from src.python.review.common.language import Language
from src.python.review.inspectors.checkstyle.checkstyle import CheckstyleInspector
from src.python.review.inspectors.checkstyle.issue_types import CHECK_CLASS_NAME_TO_ISSUE_TYPE
from src.python.review.inspectors.detekt.detekt import DetektInspector
from src.python.review.inspectors.detekt.issue_types import DETECT_CLASS_NAME_TO_ISSUE_TYPE
from src.python.review.inspectors.eslint.eslint import ESLintInspector
from src.python.review.inspectors.eslint.issue_types import ESLINT_CLASS_NAME_TO_ISSUE_TYPE
from src.python.review.inspectors.flake8.flake8 import Flake8Inspector
from src.python.review.inspectors.issue import (
get_default_issue_stat, get_main_category_by_issue_type, IssuesStat, IssueType,
)
from src.python.review.inspectors.pmd.issue_types import PMD_RULE_TO_ISSUE_TYPE
from src.python.review.inspectors.pmd.pmd import PMDInspector
from src.python.review.inspectors.pyast.python_ast import PythonAstInspector
from src.python.review.inspectors.pylint.pylint import PylintInspector
from src.python.review.inspectors.radon.radon import RadonInspector


def __get_flake8_issue_keys() -> Set[str]:
issues_dicts = [ALL_STANDARD_ISSUES, ALL_BUGBEAR_ISSUES, ALL_BUILTINS_ISSUES, ALL_RETURN_ISSUES,
ALL_FORMAT_STRING_ISSUES, ALL_IMPORT_ORDER_ISSUES, ALL_COMPREHENSIONS_ISSUES,
ALL_SPELLCHECK_ISSUES, ALL_WPS_ISSUES]
all_issues = set().union(*map(lambda d: d.keys(), issues_dicts))
return set(all_issues - set(FLAKE8_DISABLED_ISSUES))


def __match_issue_keys_to_issue_type(issue_keys: Set[str], matcher: Callable) -> Dict[str, IssueType]:
matched_issues = {}
for key in issue_keys:
matched_issues[key] = matcher(key)
return matched_issues


# Count for each main category the frequency of issues for this category
def __gather_issues_stat(issue_types: List[IssueType]) -> IssuesStat:
main_category_to_issue_type = get_default_issue_stat()
for issue_type in issue_types:
main_category_to_issue_type[get_main_category_by_issue_type(issue_type)] += 1
return main_category_to_issue_type


def __merge_issues_stats(*args: IssuesStat) -> IssuesStat:
assert len(args) >= 1, 'Please, use at least one argument'
final_stat = {}
for key in args[0].keys():
final_stat[key] = sum(d[key] for d in args)
return final_stat


def __collect_language_stat(*args: Set[Tuple[Set[str], Callable]]) -> IssuesStat:
all_issue_types = []
for issues, matcher in args:
all_issue_types.append(__match_issue_keys_to_issue_type(issues, matcher).values())
return __merge_issues_stats(*map(lambda stat: __gather_issues_stat(stat), all_issue_types))


def collect_stat_by_language(language: Language) -> IssuesStat:
if language == Language.PYTHON:
python_inspection_to_matcher = [
(set(ALL_ISSUES.keys()) - set(PYLINT_DISABLED_ISSUES), PylintInspector.choose_issue_type),
(__get_flake8_issue_keys(), Flake8Inspector.choose_issue_type),
(set(PYTHON_RADON_ISSUES.keys()), RadonInspector.choose_issue_type),
(set(PYTHON_AST_ISSUES.keys()), PythonAstInspector.choose_issue_type),
]
return __collect_language_stat(*python_inspection_to_matcher)
elif language == Language.JAVA:
java_inspection_to_matcher = [
(set(PMD_RULE_TO_ISSUE_TYPE.keys()), PMDInspector.choose_issue_type),
(set(CHECK_CLASS_NAME_TO_ISSUE_TYPE.keys() - set(CheckstyleInspector.skipped_issues)),
CheckstyleInspector.choose_issue_type),
]
return __collect_language_stat(*java_inspection_to_matcher)
elif language == Language.KOTLIN:
kotlin_inspection_to_matcher = [
(set(DETECT_CLASS_NAME_TO_ISSUE_TYPE.keys()), DetektInspector.choose_issue_type),
]
return __collect_language_stat(*kotlin_inspection_to_matcher)
elif language == Language.JS:
js_inspection_to_matcher = [
(set(ESLINT_CLASS_NAME_TO_ISSUE_TYPE.keys()), ESLintInspector.choose_issue_type),
]
return __collect_language_stat(*js_inspection_to_matcher)

raise NotImplementedError(f'Language {language} is not supported yet!')


def print_stat(language: Language, stat: IssuesStat) -> None:
print(f'Collected statistics for {language.value.lower()} language:')
for issue_type, freq in stat.items():
print(f'{issue_type}: {freq} times;')
print(f'Note: {IssueType.UNDEFINED} means a category that is not categorized among the four main categories. '
f'Most likely it is {IssueType.INFO} category')


def __parse_language(language: str) -> Language:
try:
return Language(language.upper())
except KeyError:
raise KeyError(f'Incorrect language key: {language}. Please, try again!')


def configure_arguments(parser: argparse.ArgumentParser) -> None:
languages = ', '.join(map(lambda l: l.lower(), Language.values()))

parser.add_argument('language',
type=__parse_language,
help=f'The language for which statistics will be printed. Available values are: {languages}')


def main() -> None:
parser = argparse.ArgumentParser()
configure_arguments(parser)
args = parser.parse_args()

language = args.language
stat = collect_stat_by_language(language)
print_stat(language, stat)


if __name__ == '__main__':
main()
4 changes: 4 additions & 0 deletions src/python/review/common/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ class Language(Enum):
JS = 'JAVASCRIPT'
UNKNOWN = 'UNKNOWN'

@classmethod
def values(cls) -> List[str]:
return [member.value for member in Language]


EXTENSION_TO_LANGUAGE = {
Extension.JAVA: Language.JAVA,
Expand Down
58 changes: 57 additions & 1 deletion src/python/review/inspectors/issue.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import abc
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum, unique
from pathlib import Path
from typing import Any, Dict, Union
from typing import Any, Dict, List, Union

from src.python.review.inspectors.inspector_type import InspectorType

Expand All @@ -28,6 +29,61 @@ class IssueType(Enum):
MAINTAINABILITY = 'MAINTAINABILITY'
INFO = 'INFO'

UNDEFINED = 'UNDEFINED'

def __str__(self) -> str:
return ' '.join(self.value.lower().split('_'))


ISSUE_TYPE_TO_MAIN_CATEGORY = {
# CODE_STYLE
IssueType.CODE_STYLE: IssueType.CODE_STYLE,
IssueType.LINE_LEN: IssueType.CODE_STYLE,

# BEST_PRACTICES
IssueType.BEST_PRACTICES: IssueType.BEST_PRACTICES,
IssueType.FUNC_LEN: IssueType.BEST_PRACTICES,
IssueType.BOOL_EXPR_LEN: IssueType.BEST_PRACTICES,
IssueType.METHOD_NUMBER: IssueType.BEST_PRACTICES,
IssueType.CLASS_RESPONSE: IssueType.BEST_PRACTICES,

# ERROR_PRONE
IssueType.ERROR_PRONE: IssueType.ERROR_PRONE,

# COMPLEXITY
IssueType.COMPLEXITY: IssueType.COMPLEXITY,
IssueType.CYCLOMATIC_COMPLEXITY: IssueType.COMPLEXITY,
IssueType.WEIGHTED_METHOD: IssueType.COMPLEXITY,
IssueType.COUPLING: IssueType.COMPLEXITY,
IssueType.COHESION: IssueType.COMPLEXITY,
IssueType.MAINTAINABILITY: IssueType.COMPLEXITY,
IssueType.CHILDREN_NUMBER: IssueType.COMPLEXITY,
IssueType.INHERITANCE_DEPTH: IssueType.COMPLEXITY,
IssueType.ARCHITECTURE: IssueType.COMPLEXITY,
}


def get_main_category_by_issue_type(issue_type: IssueType) -> IssueType:
return ISSUE_TYPE_TO_MAIN_CATEGORY.get(issue_type, IssueType.UNDEFINED)


def main_category_to_issue_type_list_dict() -> Dict[IssueType, List[IssueType]]:
main_category_to_issue_type = defaultdict(list)
for key, value in ISSUE_TYPE_TO_MAIN_CATEGORY.items():
main_category_to_issue_type[value].append(key)
return main_category_to_issue_type


MAIN_CATEGORY_TO_ISSUE_TYPE_LIST = main_category_to_issue_type_list_dict()

IssuesStat = Dict[IssueType, int]


def get_default_issue_stat() -> IssuesStat:
stat = {issue: 0 for issue in set(ISSUE_TYPE_TO_MAIN_CATEGORY.values())}
stat[IssueType.UNDEFINED] = 0
return stat


# Keys in results dictionary
@unique
Expand Down
2 changes: 1 addition & 1 deletion src/python/review/inspectors/pmd/issue_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from src.python.review.inspectors.issue import IssueType

RULE_TO_ISSUE_TYPE: Dict[str, IssueType] = {
PMD_RULE_TO_ISSUE_TYPE: Dict[str, IssueType] = {
# Best Practices
'AbstractClassWithoutAbstractMethod': IssueType.BEST_PRACTICES,
'AccessorClassGeneration': IssueType.BEST_PRACTICES,
Expand Down
4 changes: 2 additions & 2 deletions src/python/review/inspectors/pmd/pmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from src.python.review.inspectors.base_inspector import BaseInspector
from src.python.review.inspectors.inspector_type import InspectorType
from src.python.review.inspectors.issue import BaseIssue, CodeIssue, IssueType
from src.python.review.inspectors.pmd.issue_types import RULE_TO_ISSUE_TYPE
from src.python.review.inspectors.pmd.issue_types import PMD_RULE_TO_ISSUE_TYPE

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -72,7 +72,7 @@ def parse_output(self, output_path: Path) -> List[BaseIssue]:

@classmethod
def choose_issue_type(cls, rule: str) -> IssueType:
issue_type = RULE_TO_ISSUE_TYPE.get(rule)
issue_type = PMD_RULE_TO_ISSUE_TYPE.get(rule)
if not issue_type:
logger.warning('%s: %s - unknown rule' %
(cls.inspector_type.value, rule))
Expand Down
16 changes: 13 additions & 3 deletions src/python/review/inspectors/pyast/python_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def visit(self, node: ast.AST):
origin_class=BOOL_EXPR_LEN_ORIGIN_CLASS,
inspector_type=self._inspector_type,
bool_expr_len=length,
type=IssueType.BOOL_EXPR_LEN,
type=PythonAstInspector.choose_issue_type(BOOL_EXPR_LEN_ORIGIN_CLASS),
))


Expand Down Expand Up @@ -69,7 +69,7 @@ def visit(self, node):
origin_class=FUNC_LEN_ORIGIN_CLASS,
inspector_type=self._inspector_type,
func_len=func_length,
type=IssueType.FUNC_LEN,
type=PythonAstInspector.choose_issue_type(FUNC_LEN_ORIGIN_CLASS),
))

self._previous_node = node
Expand All @@ -91,7 +91,7 @@ def function_lens(self) -> List[FuncLenIssue]:
origin_class=FUNC_LEN_ORIGIN_CLASS,
inspector_type=self._inspector_type,
func_len=func_length,
type=IssueType.FUNC_LEN,
type=PythonAstInspector.choose_issue_type(FUNC_LEN_ORIGIN_CLASS),
))

self._previous_node = None
Expand Down Expand Up @@ -136,6 +136,16 @@ def inspect(cls, path: Path, config: dict) -> List[BaseIssue]:

return metrics

@staticmethod
def choose_issue_type(code: str) -> IssueType:
if code == BOOL_EXPR_LEN_ORIGIN_CLASS:
return IssueType.BOOL_EXPR_LEN

if code == FUNC_LEN_ORIGIN_CLASS:
return IssueType.FUNC_LEN

return IssueType.BEST_PRACTICES


def create_line_no_to_sym_no_map(content) -> Dict[int, int]:
mapping = defaultdict(lambda: len(content), {1: 0})
Expand Down
23 changes: 15 additions & 8 deletions src/python/review/inspectors/radon/radon.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from src.python.review.inspectors.tips import get_maintainability_index_tip


MAINTAINABILITY_ORIGIN_CLASS = "RAD100"
MAINTAINABILITY_ORIGIN_CLASS = 'RAD100'


class RadonInspector(BaseInspector):
Expand All @@ -19,9 +19,9 @@ class RadonInspector(BaseInspector):
@classmethod
def inspect(cls, path: Path, config: dict) -> List[BaseIssue]:
mi_command = [
"radon", "mi", # compute the Maintainability Index score
"--max", "F", # set the maximum MI rank to display
"--show", # actual MI value is shown in results, alongside the rank
'radon', 'mi', # compute the Maintainability Index score
'--max', 'F', # set the maximum MI rank to display
'--show', # actual MI value is shown in results, alongside the rank
path,
]

Expand All @@ -31,13 +31,13 @@ def inspect(cls, path: Path, config: dict) -> List[BaseIssue]:
@classmethod
def mi_parse(cls, mi_output: str) -> List[BaseIssue]:
"""
Parses the results of the "mi" command.
Parses the results of the 'mi' command.
Description: https://radon.readthedocs.io/en/latest/commandline.html#the-mi-command

:param mi_output: "mi" command output.
:param mi_output: 'mi' command output.
:return: list of issues.
"""
row_re = re.compile(r"^(.*) - \w \((.*)\)$", re.M)
row_re = re.compile(r'^(.*) - \w \((.*)\)$', re.M)

issues: List[BaseIssue] = []
for groups in row_re.findall(mi_output):
Expand All @@ -49,8 +49,15 @@ def mi_parse(cls, mi_output: str) -> List[BaseIssue]:
)
issue_data[IssueData.DESCRIPTION.value] = get_maintainability_index_tip()
issue_data[IssueData.MAINTAINABILITY_LACK.value] = maintainability_lack
issue_data[IssueData.ISSUE_TYPE.value] = IssueType.MAINTAINABILITY
issue_data[IssueData.ISSUE_TYPE.value] = cls.choose_issue_type(MAINTAINABILITY_ORIGIN_CLASS)

issues.append(MaintainabilityLackIssue(**issue_data))

return issues

@staticmethod
def choose_issue_type(code: str) -> IssueType:
if code == MAINTAINABILITY_ORIGIN_CLASS:
return IssueType.MAINTAINABILITY

return IssueType.BEST_PRACTICES
Loading