From 6e7ec63c14f9514daa5552a6f8bdaccd53047920 Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Sun, 25 Jul 2021 19:00:33 +0300
Subject: [PATCH 1/6] Add evaluation for the paper: student dynamics and
 statistics abut the tool and the tutor tool

---
 .../evaluation/paper_evaluation/README.md     |   7 +
 .../evaluation/paper_evaluation/__init__.py   |   0
 .../comparison_with_other_tools/README.md     |  58 +++++++++
 .../comparison_with_other_tools/__init__.py   |   0
 .../statistics_gathering.py                   |  57 ++++++++
 .../tutor_statistics.py                       | 122 ++++++++++++++++++
 .../comparison_with_other_tools/util.py       |  27 ++++
 .../paper_evaluation/user_dynamics/README.md  |  29 +++++
 .../user_dynamics/__init__.py                 |   0
 .../user_dynamics/dynamics_gathering.py       | 111 ++++++++++++++++
 .../user_dynamics/user_statistics.py          |  13 ++
 11 files changed, 424 insertions(+)
 create mode 100644 src/python/evaluation/paper_evaluation/README.md
 create mode 100644 src/python/evaluation/paper_evaluation/__init__.py
 create mode 100644 src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md
 create mode 100644 src/python/evaluation/paper_evaluation/comparison_with_other_tools/__init__.py
 create mode 100644 src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
 create mode 100644 src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
 create mode 100644 src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
 create mode 100644 src/python/evaluation/paper_evaluation/user_dynamics/README.md
 create mode 100644 src/python/evaluation/paper_evaluation/user_dynamics/__init__.py
 create mode 100644 src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
 create mode 100644 src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py

diff --git a/src/python/evaluation/paper_evaluation/README.md b/src/python/evaluation/paper_evaluation/README.md
new file mode 100644
index 00000000..5dac490b
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/README.md
@@ -0,0 +1,7 @@
+# Paper evaluation
+
+This module contains scripts for SIGCSE-2022 paper evaluation:
+
+- [Comparison with other tools](./comparison_with_other_tools/README.md)
+- Formatting issues importance
+- [Dynamics of student usage](./user_dynamics/README.md)
\ No newline at end of file
diff --git a/src/python/evaluation/paper_evaluation/__init__.py b/src/python/evaluation/paper_evaluation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md
new file mode 100644
index 00000000..618165f7
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/README.md
@@ -0,0 +1,58 @@
+# Comparison with other tools evaluation
+
+This module allows getting statistic about using of several code quality tools.
+In our work we compare the Hyperstyle tool with the [Tutor](https://www.hkeuning.nl/rpt/) tool.
+Other tools (FrenchPress, WebTA, and AutoStyle) does not have open sources.
+
+To get statistics we use students solutions for six programming tasks, 
+but the main script can gather this statistics for any tasks.
+
+The tasks from the out dataset:
+- **countEven**. The `countEven` method returns the number of even integers in the values-array.
+- **sumValues**. The `sumValues` method adds up all numbers from the values-array, 
+  or only the positive numbers if the `positivesOnly` boolean parameter is set 
+  to `true`.
+- **oddSum**. The method `oddSum` returns the sum of all numbers at an odd index 
+  in the array parameter, until the number -1 is seen at an odd index.
+- **calculateScore**. The `calculateScore` method calculates the score for a train trip. 
+  The highest score is 10. The score is based on the number of changes and the day of 
+  the week (Monday is 1, Sunday is 7).
+- **hasDoubled**. Write a program that calculates in how many years your savings 
+  have doubled with the given interest.
+- **haveThree**. Given an array of ints, return true if the value 3 appears in the 
+  array exactly  3 times, and no 3's are next to each other.
+  
+The dataset has several columns:
+- Student id (student_id);
+- Task key (task_key);
+- Code fragment (solution);
+- Tutor error, if it is existed (tutor_error);
+- Tutor issues keys (tutor_issues);
+- Hyperstyle issues keys (hyperstyle_issues);
+- Hyperstyle INFO issues keys (hyperstyle_info_issues);
+- Code style issues count (code_style_issues_count).
+
+The dataset stores in the `csv` format.
+
+## Usage
+
+Run the [statistics_gathering.py](statistics_gathering.py) with the arguments from command line.
+
+Required arguments:
+
+`solutions_file_path` — path to csv-file with code samples.
+
+The statistics will be printed in the terminal. The statistics includes:
+- Unique users count;
+- Code snippets count;
+- Tasks statistics: for each task count code snippets and count snippets with the Tutor errors;
+- Count code fragments has Tutor errors;
+- Count of unique errors was found in Tutor;
+- Error statistics: for each error get the error text and frequency;
+- Issues statistics:
+    - Count of unique issues in total;
+    - Common issues statistics: for all common issues for Hyperstyle and Tutor count frequency of this issue;
+    - Tutor unique issues statistics: for all Tutor issues (that were not found by Hyperstyle) count frequency of this issue;
+    - Hyperstyle unique issues statistics: for all Hyperstyle issues (that were not found by Tutor) count frequency of this issue;
+    - Count code style issues and count fragments with these issues.
+
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/__init__.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
new file mode 100644
index 00000000..a995a84b
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
@@ -0,0 +1,57 @@
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.common.pandas_util import get_solutions_df
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import TutorStatistics, \
+    IssuesStatistics
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import ComparisonColumnName
+from src.python.review.common.file_system import Extension, get_restricted_extension
+
+sys.path.append('')
+sys.path.append('../../..')
+
+logger = logging.getLogger(__name__)
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help='Local CSV-file path with feedback from different tools. '
+                             'Your file must include column-names:'
+                             f'"{ComparisonColumnName.STUDENT_ID.name}" and '
+                             f'"{ComparisonColumnName.TASK_KEY.name}" and '
+                             f'"{ComparisonColumnName.SOLUTION.name}" and '
+                             f'"{ComparisonColumnName.TUTOR_ERROR.name}" and ')
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    configure_arguments(parser)
+
+    try:
+        args = parser.parse_args()
+        solutions_file_path = args.solutions_file_path
+        extension = get_restricted_extension(solutions_file_path, [Extension.CSV])
+        solutions_df = get_solutions_df(extension, solutions_file_path)
+        tutor_stat = TutorStatistics(solutions_df, to_drop_duplicates=True)
+        tutor_stat.print_tasks_stat()
+        tutor_stat.print_error_stat()
+        print('ISSUES STAT:')
+        issue_stat = IssuesStatistics(solutions_df)
+        issue_stat.print_issues_stat()
+        return 0
+
+    except FileNotFoundError:
+        logger.error('CSV-file with the specified name does not exists.')
+        return 2
+
+    except Exception:
+        logger.exception('An unexpected error.')
+        return 2
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
new file mode 100644
index 00000000..a66b1188
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
@@ -0,0 +1,122 @@
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Dict, List, Any
+from collections import Counter
+
+import pandas as pd
+
+from src.python.evaluation.common.pandas_util import filter_df_by_single_value
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import TutorTask, ComparisonColumnName, \
+    ERROR_CONST
+
+
+def sort_freq_dict(freq_dict: Dict[Any, int]) -> Dict[Any, int]:
+    return dict(sorted(freq_dict.items(), key=lambda item: item[1], reverse=True))
+
+
+@dataclass
+class TutorStatistics:
+    unique_users: int
+    task_to_freq: Dict[TutorTask, int]
+    task_to_error_freq: Dict[TutorTask, int]
+    error_to_freq: Dict[str, int]
+    fragments_with_error: int = 0
+
+    __separator: str = '----------'
+
+    def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False):
+        if to_drop_duplicates:
+            solutions_df = solutions_df.drop_duplicates(ComparisonColumnName.SOLUTION.value)
+        self.unique_users = len(solutions_df[ComparisonColumnName.STUDENT_ID.value].unique())
+        self.task_to_freq = defaultdict(int)
+        self.task_to_error_freq = defaultdict(int)
+        self.error_to_freq = defaultdict(int)
+        for task in TutorTask:
+            task_df = filter_df_by_single_value(solutions_df, ComparisonColumnName.TASK_KEY.value, task.value)
+            self.task_to_freq[task] = task_df.shape[0]
+            errors_list = list(map(lambda e_l: e_l.split(';'),
+                                   task_df[ComparisonColumnName.TUTOR_ERROR.value].dropna().values))
+            for cell_errors in errors_list:
+                for error in cell_errors:
+                    self.error_to_freq[error] += 1
+                self.task_to_error_freq[task] += 1
+                self.fragments_with_error += 1
+        self.task_to_freq = sort_freq_dict(self.task_to_freq)
+        self.error_to_freq = sort_freq_dict(self.error_to_freq)
+
+    def print_tasks_stat(self) -> None:
+        print(f'Unique users count: {self.unique_users}')
+        print(f'Code snippets count: {sum(self.task_to_freq.values())}')
+        print('Tasks statistics:')
+        for task, freq in self.task_to_freq.items():
+            print(f'Task {task.value}: {freq} items; {self.task_to_error_freq[task]} with tutor errors')
+        print(self.__separator)
+
+    def print_error_stat(self) -> None:
+        print(f'{self.fragments_with_error} code fragments has errors during running by Tutor')
+        print(f'{len(self.error_to_freq.keys())} unique errors was found in Tutor')
+        print('Error statistics:')
+        for error, freq in self.error_to_freq.items():
+            print(f'{error}: {freq} items')
+        print(self.__separator)
+
+
+@dataclass
+class IssuesStatistics:
+    common_issue_to_freq: Dict[str, int]
+    tutor_uniq_issue_to_freq: Dict[str, int]
+    hyperstyle_uniq_issue_to_freq: Dict[str, int]
+
+    code_style_issues_count: int
+    fragments_count_with_code_style_issues: int
+
+    __separator: str = '----------'
+
+    # TODO: info and code style issues
+    def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False):
+        if to_drop_duplicates:
+            solutions_df = solutions_df.drop_duplicates(ComparisonColumnName.SOLUTION.value)
+        self.common_issue_to_freq = defaultdict(int)
+        self.tutor_uniq_issue_to_freq = defaultdict(int)
+        self.hyperstyle_uniq_issue_to_freq = defaultdict(int)
+        solutions_df.apply(lambda row: self.__init_solution_df_row(row), axis=1)
+        self.common_issue_to_freq = sort_freq_dict(self.common_issue_to_freq)
+        self.tutor_uniq_issue_to_freq = sort_freq_dict(self.tutor_uniq_issue_to_freq)
+        self.hyperstyle_uniq_issue_to_freq = sort_freq_dict(self.hyperstyle_uniq_issue_to_freq)
+        self.code_style_issues_count = sum(solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value])
+        self.fragments_count_with_code_style_issues = len(list(filter(lambda x: x != 0, solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value])))
+
+    @staticmethod
+    def __parse_issues(issues_str: str) -> List[str]:
+        if pd.isna(issues_str) or issues_str == ERROR_CONST:
+            return []
+        return issues_str.split(';')
+
+    @staticmethod
+    def __add_issues(issues_dict: Dict[str, int], issues: List[str]) -> None:
+        for issue in issues:
+            issues_dict[issue] += 1
+
+    def __init_solution_df_row(self, row: pd.DataFrame) -> None:
+        tutor_issues = self.__parse_issues(row[ComparisonColumnName.TUTOR_ISSUES.value])
+        hyperstyle_issues = self.__parse_issues(row[ComparisonColumnName.HYPERSTYLE_ISSUES.value])
+        common_issues = list((Counter(tutor_issues) & Counter(hyperstyle_issues)).elements())
+        self.__add_issues(self.common_issue_to_freq, common_issues)
+        self.__add_issues(self.tutor_uniq_issue_to_freq, list(set(tutor_issues) - set(common_issues)))
+        self.__add_issues(self.hyperstyle_uniq_issue_to_freq, list(set(hyperstyle_issues) - set(common_issues)))
+
+    def __print_freq_issues_stat(self, freq_stat: Dict[str, int], prefix: str) -> None:
+        print(f'{prefix} issues statistics:')
+        for issue, freq in freq_stat.items():
+            print(f'{issue} was found {freq} times')
+        print(self.__separator)
+
+    def print_issues_stat(self) -> None:
+        print(f'{len(self.common_issue_to_freq.keys()) + len(self.tutor_uniq_issue_to_freq.keys()) + len(self.hyperstyle_uniq_issue_to_freq.keys())} unique issues in total was found')
+        print(self.__separator)
+        self.__print_freq_issues_stat(self.common_issue_to_freq, 'Common')
+        self.__print_freq_issues_stat(self.tutor_uniq_issue_to_freq, 'Tutor unique')
+        self.__print_freq_issues_stat(self.hyperstyle_uniq_issue_to_freq, 'Hyperstyle unique')
+        print(f'{self.code_style_issues_count} code style issues (spaces, different brackets, indentations)'
+              f' was found in total by hyperstyle in {self.fragments_count_with_code_style_issues}  fragments')
+        print(self.__separator)
\ No newline at end of file
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
new file mode 100644
index 00000000..d3779a77
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
@@ -0,0 +1,27 @@
+from enum import unique, Enum
+
+
+@unique
+class ComparisonColumnName(Enum):
+    STUDENT_ID = 'student_id'
+    TASK_KEY = 'task_key'
+    SOLUTION = 'solution'
+    TUTOR_ERROR = 'tutor_error'
+
+    TUTOR_ISSUES = 'tutor_issues'
+    HYPERSTYLE_ISSUES = 'hyperstyle_issues'
+    HYPERSTYLE_INFO_ISSUES = 'hyperstyle_info_issues'
+    CODE_STYLE_ISSUES_COUNT = 'code_style_issues_count'
+
+
+ERROR_CONST = 'ERROR'
+
+
+@unique
+class TutorTask(Enum):
+    EVEN = 'countEven'
+    SUM_VALUES = 'sumValues'
+    ODD_SUM = 'oddSum'
+    SCORE = 'calculateScore'
+    HAS_DOUBLED = 'hasDoubled'
+    HAVE_THREE = 'haveThree'
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/README.md b/src/python/evaluation/paper_evaluation/user_dynamics/README.md
new file mode 100644
index 00000000..9e46df11
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/README.md
@@ -0,0 +1,29 @@
+# Dynamics of student usage
+
+This module allows getting statistics about students dynamics in code quality issues improvements.
+
+## Usage
+
+Run the [dynamics_gathering.py](dynamics_gathering.py) with the arguments from command line.
+
+Required arguments:
+
+`solutions_file_path` — path to csv-file with code samples.
+
+Optional arguments:
+Argument | Description
+--- | ---
+|**&#8209;fb**, **&#8209;&#8209;freq-boundary**| The boundary of solutions count for one student to analyze. The default value is 100.|
+|**&#8209;n**, **&#8209;&#8209;n**| Top n popular issues in solutions. The default value is 100. |
+
+In the result a file with students issues dynamics will be created.
+Also, the top of issues for all students will be printed into the terminal. This statistics has key of issue and frequency for all students.
+
+An example of issues dynamics:
+```text
+user,traceback
+0,"0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,0,0,2,0,4,0,6,3,0,3,0,0,0,1,1,0,0,0,1,0,0,0,2,0,0,0,0,0,0,4,0,0,0,1,6,0,1,0,1,3,0,0,1,1,0,0,0,0,0,3,6,1,0,0,0,0,0,0,0,4,1,0,0,1,0,8,0,2,8,0,0,0,0,1,1,1,1,3,7,23,0,9"
+1,"0,0,0,3,0,0,2,1,0,0,0,0,4,1,0,0,1,1,0,0,0,0,0,6,0,1,1,0,8,1,2,1,1,0,0,1,0,4,10,1,1,1,3,0,1,0,0,0,1,0,0,0,0,0,0,2,0,3,0,0,2,2,3,2,0,0,0,1,0,1,1,0,0,1,0,4,6,2,0,0,1,0,0,0,0,2,0,0,0,2,1,2,1,0,1,7,1,0,1,1,0,1,0"
+```
+Each number in the traceback column is the count of issues in one solution. 
+The numbers of issues sorted by timestamps.
\ No newline at end of file
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/__init__.py b/src/python/evaluation/paper_evaluation/user_dynamics/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
new file mode 100644
index 00000000..736811e7
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
@@ -0,0 +1,111 @@
+import argparse
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import List, Dict
+
+import pandas as pd
+
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.common.csv_util import write_dataframe_to_csv
+from src.python.evaluation.common.pandas_util import get_solutions_df, logger, filter_df_by_single_value, \
+    drop_duplicates, get_issues_from_json
+from src.python.evaluation.common.util import ColumnName
+from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import sort_freq_dict
+from src.python.evaluation.paper_evaluation.user_dynamics.user_statistics import UserStatistics
+from src.python.review.common.file_system import get_restricted_extension, Extension, get_parent_folder
+from src.python.review.inspectors.issue import IssueType
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help=RunToolArgument.SOLUTIONS_FILE_PATH.value)
+
+    parser.add_argument('-fb', '--freq-boundary',
+                        help='The boundary of solutions count for one student to analyze',
+                        type=int,
+                        default=100)
+
+    parser.add_argument('-n', '--n',
+                        help='Top n popular issues in solutions',
+                        type=int,
+                        default=10)
+
+
+def __get_top_freq_issues(issues: List[List[PenaltyIssue]], n: int) -> Dict[str, int]:
+    all_issues = list(map(lambda i: i.origin_class, [item for sublist in issues for item in sublist]))
+    return dict(Counter(all_issues).most_common(n))
+
+
+# Get statistics only for users that have >= freq_boundary solutions in solutions_df
+# Statistics for each student has:
+#  - <traceback> - list of list of issues, but without INFO issues
+#  - <top_issues> - for each key of issue from <traceback> has frequency.
+#    Contains only top_n issues
+def __get_user_statistics(solutions_df: pd.DataFrame, freq_boundary: int = 100,
+                          top_n: int = 10) -> List[UserStatistics]:
+    stat = []
+    counts = solutions_df[ColumnName.USER.value].value_counts()
+    solutions_df = solutions_df[solutions_df[ColumnName.USER.value].isin(counts[counts > freq_boundary].index)]
+    for user in solutions_df[ColumnName.USER.value].unique():
+        user_df = filter_df_by_single_value(solutions_df,
+                                            ColumnName.USER.value, user).sort_values(ColumnName.TIME.value)
+        user_df = drop_duplicates(user_df)
+        traceback = list(map(lambda t: get_issues_from_json(t),
+                             list(user_df[ColumnName.TRACEBACK.value])))
+        # Filter info category
+        traceback = list(filter(lambda issues_list: filter(lambda i: i.type != IssueType.INFO, issues_list), traceback))
+        top_issues = __get_top_freq_issues(traceback, top_n)
+        stat.append(UserStatistics(traceback, top_issues))
+    return stat
+
+
+def __get_student_dynamics(stats: List[UserStatistics]) -> pd.DataFrame:
+    dynamics = map(lambda s: s.get_traceback_dynamics(), stats)
+    dynamics_dict = {i: ','.join(map(lambda d: str(d), dyn)) for (i, dyn) in enumerate(dynamics)}
+    return pd.DataFrame(dynamics_dict.items(), columns=[ColumnName.USER.value, ColumnName.TRACEBACK.value])
+
+
+def __get_total_top(stats: List[UserStatistics]) -> Dict[str, int]:
+    total_top_n = {}
+    for d in map(lambda s: s.top_issues, stats):
+        for k, v in d.items():
+            total_top_n.setdefault(k, 0)
+            total_top_n[k] += v
+    return sort_freq_dict(total_top_n)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    configure_arguments(parser)
+
+    try:
+        args = parser.parse_args()
+        solutions_file_path = args.solutions_file_path
+        extension = get_restricted_extension(solutions_file_path, [Extension.CSV])
+        solutions_df = get_solutions_df(extension, solutions_file_path)
+        solutions_df = filter_df_by_single_value(solutions_df, ColumnName.IS_PUBLIC.value, 'YES')
+        stats = __get_user_statistics(solutions_df, freq_boundary=args.freq_boundary, top_n=args.n)
+        dynamics = __get_student_dynamics(stats)
+        output_path = get_parent_folder(Path(solutions_file_path)) / f'student_issues_dynamics{Extension.CSV.value}'
+        write_dataframe_to_csv(output_path, dynamics)
+        print(f'The students dynamics was saved here: {output_path}')
+        total_top = __get_total_top(stats)
+        print('Total top issues:')
+        for i, (key, freq) in enumerate(total_top.items()):
+            print(f'{i}. {key} was found {freq} times')
+        return 0
+
+    except FileNotFoundError:
+        logger.error('CSV-file with the specified name does not exists.')
+        return 2
+
+    except Exception:
+        logger.exception('An unexpected error.')
+        return 2
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py
new file mode 100644
index 00000000..eaee11b1
--- /dev/null
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+from typing import List, Dict
+
+from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
+
+
+@dataclass
+class UserStatistics:
+    traceback: List[List[PenaltyIssue]]
+    top_issues: Dict[str, int]
+
+    def get_traceback_dynamics(self) -> List[int]:
+        return list(map(lambda i_l: len(i_l), self.traceback))
\ No newline at end of file

From 0c1ada765574f73702fd5fd79ff52d0e4315f0e1 Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Sun, 25 Jul 2021 19:12:02 +0300
Subject: [PATCH 2/6] Fix flake8

---
 .../statistics_gathering.py                   |  5 +++--
 .../tutor_statistics.py                       | 21 ++++++++++++-------
 .../comparison_with_other_tools/util.py       |  2 +-
 .../user_dynamics/dynamics_gathering.py       | 10 ++++-----
 .../user_dynamics/user_statistics.py          |  4 ++--
 whitelist.txt                                 |  6 ++++++
 6 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
index a995a84b..130092eb 100644
--- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/statistics_gathering.py
@@ -5,8 +5,9 @@
 
 from src.python.common.tool_arguments import RunToolArgument
 from src.python.evaluation.common.pandas_util import get_solutions_df
-from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import TutorStatistics, \
-    IssuesStatistics
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import (
+    IssuesStatistics, TutorStatistics,
+)
 from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import ComparisonColumnName
 from src.python.review.common.file_system import Extension, get_restricted_extension
 
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
index a66b1188..4e7bf2e6 100644
--- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
@@ -1,13 +1,13 @@
+from collections import Counter
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Dict, List, Any
-from collections import Counter
+from typing import Any, Dict, List
 
 import pandas as pd
-
 from src.python.evaluation.common.pandas_util import filter_df_by_single_value
-from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import TutorTask, ComparisonColumnName, \
-    ERROR_CONST
+from src.python.evaluation.paper_evaluation.comparison_with_other_tools.util import (
+    ComparisonColumnName, ERROR_CONST, TutorTask,
+)
 
 
 def sort_freq_dict(freq_dict: Dict[Any, int]) -> Dict[Any, int]:
@@ -84,7 +84,8 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
         self.tutor_uniq_issue_to_freq = sort_freq_dict(self.tutor_uniq_issue_to_freq)
         self.hyperstyle_uniq_issue_to_freq = sort_freq_dict(self.hyperstyle_uniq_issue_to_freq)
         self.code_style_issues_count = sum(solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value])
-        self.fragments_count_with_code_style_issues = len(list(filter(lambda x: x != 0, solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value])))
+        self.fragments_count_with_code_style_issues = len(list(
+            filter(lambda x: x != 0, solutions_df[ComparisonColumnName.CODE_STYLE_ISSUES_COUNT.value])))
 
     @staticmethod
     def __parse_issues(issues_str: str) -> List[str]:
@@ -112,11 +113,15 @@ def __print_freq_issues_stat(self, freq_stat: Dict[str, int], prefix: str) -> No
         print(self.__separator)
 
     def print_issues_stat(self) -> None:
-        print(f'{len(self.common_issue_to_freq.keys()) + len(self.tutor_uniq_issue_to_freq.keys()) + len(self.hyperstyle_uniq_issue_to_freq.keys())} unique issues in total was found')
+        uniq_issues = (len(self.common_issue_to_freq)
+                       + len(self.tutor_uniq_issue_to_freq)
+                       + len(self.hyperstyle_uniq_issue_to_freq)
+                       )
+        print(f'{uniq_issues} unique issues in total was found')
         print(self.__separator)
         self.__print_freq_issues_stat(self.common_issue_to_freq, 'Common')
         self.__print_freq_issues_stat(self.tutor_uniq_issue_to_freq, 'Tutor unique')
         self.__print_freq_issues_stat(self.hyperstyle_uniq_issue_to_freq, 'Hyperstyle unique')
         print(f'{self.code_style_issues_count} code style issues (spaces, different brackets, indentations)'
               f' was found in total by hyperstyle in {self.fragments_count_with_code_style_issues}  fragments')
-        print(self.__separator)
\ No newline at end of file
+        print(self.__separator)
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
index d3779a77..eff1bc15 100644
--- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/util.py
@@ -1,4 +1,4 @@
-from enum import unique, Enum
+from enum import Enum, unique
 
 
 @unique
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
index 736811e7..7d9bbec5 100644
--- a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
@@ -2,19 +2,19 @@
 import sys
 from collections import Counter
 from pathlib import Path
-from typing import List, Dict
+from typing import Dict, List
 
 import pandas as pd
-
 from src.python.common.tool_arguments import RunToolArgument
 from src.python.evaluation.common.csv_util import write_dataframe_to_csv
-from src.python.evaluation.common.pandas_util import get_solutions_df, logger, filter_df_by_single_value, \
-    drop_duplicates, get_issues_from_json
+from src.python.evaluation.common.pandas_util import (
+    drop_duplicates, filter_df_by_single_value, get_issues_from_json, get_solutions_df, logger,
+)
 from src.python.evaluation.common.util import ColumnName
 from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
 from src.python.evaluation.paper_evaluation.comparison_with_other_tools.tutor_statistics import sort_freq_dict
 from src.python.evaluation.paper_evaluation.user_dynamics.user_statistics import UserStatistics
-from src.python.review.common.file_system import get_restricted_extension, Extension, get_parent_folder
+from src.python.review.common.file_system import Extension, get_parent_folder, get_restricted_extension
 from src.python.review.inspectors.issue import IssueType
 
 
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py
index eaee11b1..0423a70b 100644
--- a/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/user_statistics.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Dict
+from typing import Dict, List
 
 from src.python.evaluation.inspectors.common.statistics import PenaltyIssue
 
@@ -10,4 +10,4 @@ class UserStatistics:
     top_issues: Dict[str, int]
 
     def get_traceback_dynamics(self) -> List[int]:
-        return list(map(lambda i_l: len(i_l), self.traceback))
\ No newline at end of file
+        return list(map(lambda i_l: len(i_l), self.traceback))
diff --git a/whitelist.txt b/whitelist.txt
index b505f492..18fd4cb7 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -183,3 +183,9 @@ Xpath
 Ctor
 Atclause
 puppycrawl
+CONST
+isna
+dropna
+sublist
+dyn
+setdefault

From e1220cd273debc2fbabb47a543956731bca7f2a6 Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Mon, 26 Jul 2021 09:50:48 +0300
Subject: [PATCH 3/6] Rename statistics

---
 src/python/evaluation/inspectors/common/statistics.py       | 2 +-
 .../evaluation/inspectors/print_inspectors_statistics.py    | 4 ++--
 .../evaluation/{statistics => issues_statistics}/README.md  | 0
 .../{statistics => issues_statistics}/__init__.py           | 0
 .../{statistics => issues_statistics}/common/__init__.py    | 0
 .../common/raw_issue_encoder_decoder.py                     | 0
 .../{statistics => issues_statistics}/get_raw_issues.py     | 2 +-
 .../comparison_with_other_tools/tutor_statistics.py         | 6 +++---
 .../paper_evaluation/user_dynamics/dynamics_gathering.py    | 2 +-
 .../evaluation/qodana/convert_to_hyperstyle_inspections.py  | 2 +-
 test/python/evaluation/statistics/__init__.py               | 2 +-
 test/python/evaluation/statistics/test_get_raw_issues.py    | 2 +-
 .../statistics/test_raw_issue_encoding_decoding.py          | 2 +-
 13 files changed, 12 insertions(+), 12 deletions(-)
 rename src/python/evaluation/{statistics => issues_statistics}/README.md (100%)
 rename src/python/evaluation/{statistics => issues_statistics}/__init__.py (100%)
 rename src/python/evaluation/{statistics => issues_statistics}/common/__init__.py (100%)
 rename src/python/evaluation/{statistics => issues_statistics}/common/raw_issue_encoder_decoder.py (100%)
 rename src/python/evaluation/{statistics => issues_statistics}/get_raw_issues.py (98%)

diff --git a/src/python/evaluation/inspectors/common/statistics.py b/src/python/evaluation/inspectors/common/statistics.py
index 401a29a6..7645ccd5 100644
--- a/src/python/evaluation/inspectors/common/statistics.py
+++ b/src/python/evaluation/inspectors/common/statistics.py
@@ -63,7 +63,7 @@ def get_categorized_statistics(self) -> Dict[IssueType, Dict[ShortIssue, int]]:
             categorized_stat[issue.type][issue] = freq
         return categorized_stat
 
-    # Get statistics for each IssueType: count unique issues, count fragments with these issues
+    # Get issues_statistics for each IssueType: count unique issues, count fragments with these issues
     def get_short_categorized_statistics(self) -> Dict[IssueType, Tuple[int, int]]:
         categorized_statistics: Dict[IssueType, Dict[ShortIssue, int]] = self.get_categorized_statistics()
         short_categorized_statistics = defaultdict(tuple)
diff --git a/src/python/evaluation/inspectors/print_inspectors_statistics.py b/src/python/evaluation/inspectors/print_inspectors_statistics.py
index e3146cd6..cc37c0f7 100644
--- a/src/python/evaluation/inspectors/print_inspectors_statistics.py
+++ b/src/python/evaluation/inspectors/print_inspectors_statistics.py
@@ -18,7 +18,7 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
                         help=RunToolArgument.DIFFS_FILE_PATH.value.description)
 
     parser.add_argument('--categorize',
-                        help='If True, statistics will be categorized by several categories.',
+                        help='If True, issues_statistics will be categorized by several categories.',
                         action='store_true')
 
     parser.add_argument('-n', '--top-n',
@@ -27,7 +27,7 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
                         default=10)
 
     parser.add_argument('--full-stat',
-                        help='If True, full statistics will be printed.',
+                        help='If True, full issues_statistics will be printed.',
                         action='store_true')
 
 
diff --git a/src/python/evaluation/statistics/README.md b/src/python/evaluation/issues_statistics/README.md
similarity index 100%
rename from src/python/evaluation/statistics/README.md
rename to src/python/evaluation/issues_statistics/README.md
diff --git a/src/python/evaluation/statistics/__init__.py b/src/python/evaluation/issues_statistics/__init__.py
similarity index 100%
rename from src/python/evaluation/statistics/__init__.py
rename to src/python/evaluation/issues_statistics/__init__.py
diff --git a/src/python/evaluation/statistics/common/__init__.py b/src/python/evaluation/issues_statistics/common/__init__.py
similarity index 100%
rename from src/python/evaluation/statistics/common/__init__.py
rename to src/python/evaluation/issues_statistics/common/__init__.py
diff --git a/src/python/evaluation/statistics/common/raw_issue_encoder_decoder.py b/src/python/evaluation/issues_statistics/common/raw_issue_encoder_decoder.py
similarity index 100%
rename from src/python/evaluation/statistics/common/raw_issue_encoder_decoder.py
rename to src/python/evaluation/issues_statistics/common/raw_issue_encoder_decoder.py
diff --git a/src/python/evaluation/statistics/get_raw_issues.py b/src/python/evaluation/issues_statistics/get_raw_issues.py
similarity index 98%
rename from src/python/evaluation/statistics/get_raw_issues.py
rename to src/python/evaluation/issues_statistics/get_raw_issues.py
index 19791c79..b077f34f 100644
--- a/src/python/evaluation/statistics/get_raw_issues.py
+++ b/src/python/evaluation/issues_statistics/get_raw_issues.py
@@ -15,7 +15,7 @@
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path, write_df_to_file
 from src.python.evaluation.common.util import ColumnName
 from src.python.evaluation.evaluation_run_tool import get_language_version
-from src.python.evaluation.statistics.common.raw_issue_encoder_decoder import RawIssueEncoder
+from src.python.evaluation.issues_statistics.common.raw_issue_encoder_decoder import RawIssueEncoder
 from src.python.review.common.file_system import (
     create_file,
     Extension,
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
index 4e7bf2e6..c378c918 100644
--- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
@@ -47,7 +47,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
     def print_tasks_stat(self) -> None:
         print(f'Unique users count: {self.unique_users}')
         print(f'Code snippets count: {sum(self.task_to_freq.values())}')
-        print('Tasks statistics:')
+        print('Tasks issues_statistics:')
         for task, freq in self.task_to_freq.items():
             print(f'Task {task.value}: {freq} items; {self.task_to_error_freq[task]} with tutor errors')
         print(self.__separator)
@@ -55,7 +55,7 @@ def print_tasks_stat(self) -> None:
     def print_error_stat(self) -> None:
         print(f'{self.fragments_with_error} code fragments has errors during running by Tutor')
         print(f'{len(self.error_to_freq.keys())} unique errors was found in Tutor')
-        print('Error statistics:')
+        print('Error issues_statistics:')
         for error, freq in self.error_to_freq.items():
             print(f'{error}: {freq} items')
         print(self.__separator)
@@ -107,7 +107,7 @@ def __init_solution_df_row(self, row: pd.DataFrame) -> None:
         self.__add_issues(self.hyperstyle_uniq_issue_to_freq, list(set(hyperstyle_issues) - set(common_issues)))
 
     def __print_freq_issues_stat(self, freq_stat: Dict[str, int], prefix: str) -> None:
-        print(f'{prefix} issues statistics:')
+        print(f'{prefix} issues issues_statistics:')
         for issue, freq in freq_stat.items():
             print(f'{issue} was found {freq} times')
         print(self.__separator)
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
index 7d9bbec5..05a94f17 100644
--- a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
@@ -39,7 +39,7 @@ def __get_top_freq_issues(issues: List[List[PenaltyIssue]], n: int) -> Dict[str,
     return dict(Counter(all_issues).most_common(n))
 
 
-# Get statistics only for users that have >= freq_boundary solutions in solutions_df
+# Get issues_statistics only for users that have >= freq_boundary solutions in solutions_df
 # Statistics for each student has:
 #  - <traceback> - list of list of issues, but without INFO issues
 #  - <top_issues> - for each key of issue from <traceback> has frequency.
diff --git a/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
index 5d7530a3..f52d94d6 100644
--- a/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
+++ b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
@@ -75,7 +75,7 @@ def __qodana_to_hyperstyle_output(qodana_output: str, issues_to_keep: Set[str])
 
 # Resort all fields in the qodana dataframe according to the hyperstyle dataframe
 # Add column with hyperstyle output (convert qodana output to hyperstyle output)
-# Add grade column with grades from hyperstyle dataframe (to gather statistics by diffs_between_df.py script)
+# Add grade column with grades from hyperstyle dataframe (to gather issues_statistics by diffs_between_df.py script)
 def __prepare_qodana_df(qodana_df: pd.DataFrame, hyperstyle_df: pd.DataFrame,
                         issues_to_keep: Set[str]) -> pd.DataFrame:
     qodana_df = __preprocess_df(qodana_df, hyperstyle_df[ColumnName.ID.value])
diff --git a/test/python/evaluation/statistics/__init__.py b/test/python/evaluation/statistics/__init__.py
index 08bac33a..831620c4 100644
--- a/test/python/evaluation/statistics/__init__.py
+++ b/test/python/evaluation/statistics/__init__.py
@@ -1,6 +1,6 @@
 from test.python.evaluation import CURRENT_TEST_DATA_FOLDER
 
-STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'statistics'
+STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'issues_statistics'
 
 GET_RAW_ISSUES_DATA_FOLDER = STATISTICS_TEST_DATA_FOLDER / 'get_raw_issues'
 
diff --git a/test/python/evaluation/statistics/test_get_raw_issues.py b/test/python/evaluation/statistics/test_get_raw_issues.py
index c11882c2..899c1c04 100644
--- a/test/python/evaluation/statistics/test_get_raw_issues.py
+++ b/test/python/evaluation/statistics/test_get_raw_issues.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
-from src.python.evaluation.statistics.get_raw_issues import _filter_issues, _get_output_path, inspect_solutions
+from src.python.evaluation.issues_statistics.get_raw_issues import _filter_issues, _get_output_path, inspect_solutions
 from src.python.review.inspectors.inspector_type import InspectorType
 from src.python.review.inspectors.issue import BaseIssue, CodeIssue, IssueType, LineLenIssue, MaintainabilityLackIssue
 
diff --git a/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py b/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py
index 82c6c901..43c20e08 100644
--- a/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py
+++ b/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py
@@ -3,7 +3,7 @@
 from pathlib import Path
 
 import pytest
-from src.python.evaluation.statistics.common.raw_issue_encoder_decoder import RawIssueDecoder, RawIssueEncoder
+from src.python.evaluation.issues_statistics.common.raw_issue_encoder_decoder import RawIssueDecoder, RawIssueEncoder
 from src.python.review.inspectors.inspector_type import InspectorType
 from src.python.review.inspectors.issue import (
     BaseIssue,

From 09da795fc98a320ef7ee95dd6ea5df0880a912ed Mon Sep 17 00:00:00 2001
From: Ilya Vlasov <ilyavlasov2011@gmail.com>
Date: Tue, 27 Jul 2021 11:49:26 +0300
Subject: [PATCH 4/6] statistics -> issues_statistics

---
 .../evaluation/{statistics => issues_statistics}/__init__.py  | 4 ++--
 .../{statistics => issues_statistics}/test_get_raw_issues.py  | 2 +-
 .../test_raw_issue_encoding_decoding.py                       | 0
 .../target_files/target_fragment_per_language.csv             | 0
 .../get_raw_issues/target_files/target_incorrect_code.csv     | 0
 .../get_raw_issues/target_files/target_incorrect_language.csv | 0
 .../get_raw_issues/test_files/test_fragment_per_language.csv  | 0
 .../get_raw_issues/test_files/test_incorrect_code.csv         | 0
 .../get_raw_issues/test_files/test_incorrect_language.csv     | 0
 9 files changed, 3 insertions(+), 3 deletions(-)
 rename test/python/evaluation/{statistics => issues_statistics}/__init__.py (57%)
 rename test/python/evaluation/{statistics => issues_statistics}/test_get_raw_issues.py (98%)
 rename test/python/evaluation/{statistics => issues_statistics}/test_raw_issue_encoding_decoding.py (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/target_files/target_fragment_per_language.csv (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/target_files/target_incorrect_code.csv (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/target_files/target_incorrect_language.csv (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/test_files/test_fragment_per_language.csv (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/test_files/test_incorrect_code.csv (100%)
 rename test/resources/evaluation/{statistics => issues_statistics}/get_raw_issues/test_files/test_incorrect_language.csv (100%)

diff --git a/test/python/evaluation/statistics/__init__.py b/test/python/evaluation/issues_statistics/__init__.py
similarity index 57%
rename from test/python/evaluation/statistics/__init__.py
rename to test/python/evaluation/issues_statistics/__init__.py
index 831620c4..9a178e36 100644
--- a/test/python/evaluation/statistics/__init__.py
+++ b/test/python/evaluation/issues_statistics/__init__.py
@@ -1,8 +1,8 @@
 from test.python.evaluation import CURRENT_TEST_DATA_FOLDER
 
-STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'issues_statistics'
+ISSUES_STATISTICS_TEST_DATA_FOLDER = CURRENT_TEST_DATA_FOLDER / 'issues_statistics'
 
-GET_RAW_ISSUES_DATA_FOLDER = STATISTICS_TEST_DATA_FOLDER / 'get_raw_issues'
+GET_RAW_ISSUES_DATA_FOLDER = ISSUES_STATISTICS_TEST_DATA_FOLDER / 'get_raw_issues'
 
 GET_RAW_ISSUES_TEST_FILES_FOLDER = GET_RAW_ISSUES_DATA_FOLDER / 'test_files'
 
diff --git a/test/python/evaluation/statistics/test_get_raw_issues.py b/test/python/evaluation/issues_statistics/test_get_raw_issues.py
similarity index 98%
rename from test/python/evaluation/statistics/test_get_raw_issues.py
rename to test/python/evaluation/issues_statistics/test_get_raw_issues.py
index 899c1c04..643ada1c 100644
--- a/test/python/evaluation/statistics/test_get_raw_issues.py
+++ b/test/python/evaluation/issues_statistics/test_get_raw_issues.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 from test.python.common_util import equal_df
-from test.python.evaluation.statistics import GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER
+from test.python.evaluation.issues_statistics import GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER
 from typing import List, Optional
 
 import pandas as pd
diff --git a/test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py b/test/python/evaluation/issues_statistics/test_raw_issue_encoding_decoding.py
similarity index 100%
rename from test/python/evaluation/statistics/test_raw_issue_encoding_decoding.py
rename to test/python/evaluation/issues_statistics/test_raw_issue_encoding_decoding.py
diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_fragment_per_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_fragment_per_language.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_fragment_per_language.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_fragment_per_language.csv
diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_code.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_code.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_code.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_code.csv
diff --git a/test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_language.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/target_files/target_incorrect_language.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/target_files/target_incorrect_language.csv
diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_fragment_per_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_fragment_per_language.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_fragment_per_language.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_fragment_per_language.csv
diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_code.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_code.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_code.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_code.csv
diff --git a/test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_language.csv b/test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_language.csv
similarity index 100%
rename from test/resources/evaluation/statistics/get_raw_issues/test_files/test_incorrect_language.csv
rename to test/resources/evaluation/issues_statistics/get_raw_issues/test_files/test_incorrect_language.csv

From 79b77320d8e98bc32f1db785d90290121c0138c9 Mon Sep 17 00:00:00 2001
From: Ilya Vlasov <ilyavlasov2011@gmail.com>
Date: Tue, 27 Jul 2021 11:53:29 +0300
Subject: [PATCH 5/6] Fixed flake8

---
 .../evaluation/issues_statistics/test_get_raw_issues.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/python/evaluation/issues_statistics/test_get_raw_issues.py b/test/python/evaluation/issues_statistics/test_get_raw_issues.py
index 643ada1c..cafa3b63 100644
--- a/test/python/evaluation/issues_statistics/test_get_raw_issues.py
+++ b/test/python/evaluation/issues_statistics/test_get_raw_issues.py
@@ -1,6 +1,8 @@
 from pathlib import Path
 from test.python.common_util import equal_df
-from test.python.evaluation.issues_statistics import GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER
+from test.python.evaluation.issues_statistics import (
+    GET_RAW_ISSUES_TARGET_FILES_FOLDER, GET_RAW_ISSUES_TEST_FILES_FOLDER,
+)
 from typing import List, Optional
 
 import pandas as pd

From 9ca7f31dff66ee5f2ca4356a5d2450654a53a49d Mon Sep 17 00:00:00 2001
From: Ilya Vlasov <ilyavlasov2011@gmail.com>
Date: Tue, 27 Jul 2021 12:02:43 +0300
Subject: [PATCH 6/6] Undo some renaming

---
 src/python/evaluation/inspectors/common/statistics.py       | 2 +-
 .../evaluation/inspectors/print_inspectors_statistics.py    | 4 ++--
 .../comparison_with_other_tools/tutor_statistics.py         | 6 +++---
 .../paper_evaluation/user_dynamics/dynamics_gathering.py    | 2 +-
 .../evaluation/qodana/convert_to_hyperstyle_inspections.py  | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/python/evaluation/inspectors/common/statistics.py b/src/python/evaluation/inspectors/common/statistics.py
index 7645ccd5..401a29a6 100644
--- a/src/python/evaluation/inspectors/common/statistics.py
+++ b/src/python/evaluation/inspectors/common/statistics.py
@@ -63,7 +63,7 @@ def get_categorized_statistics(self) -> Dict[IssueType, Dict[ShortIssue, int]]:
             categorized_stat[issue.type][issue] = freq
         return categorized_stat
 
-    # Get issues_statistics for each IssueType: count unique issues, count fragments with these issues
+    # Get statistics for each IssueType: count unique issues, count fragments with these issues
     def get_short_categorized_statistics(self) -> Dict[IssueType, Tuple[int, int]]:
         categorized_statistics: Dict[IssueType, Dict[ShortIssue, int]] = self.get_categorized_statistics()
         short_categorized_statistics = defaultdict(tuple)
diff --git a/src/python/evaluation/inspectors/print_inspectors_statistics.py b/src/python/evaluation/inspectors/print_inspectors_statistics.py
index cc37c0f7..e3146cd6 100644
--- a/src/python/evaluation/inspectors/print_inspectors_statistics.py
+++ b/src/python/evaluation/inspectors/print_inspectors_statistics.py
@@ -18,7 +18,7 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
                         help=RunToolArgument.DIFFS_FILE_PATH.value.description)
 
     parser.add_argument('--categorize',
-                        help='If True, issues_statistics will be categorized by several categories.',
+                        help='If True, statistics will be categorized by several categories.',
                         action='store_true')
 
     parser.add_argument('-n', '--top-n',
@@ -27,7 +27,7 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
                         default=10)
 
     parser.add_argument('--full-stat',
-                        help='If True, full issues_statistics will be printed.',
+                        help='If True, full statistics will be printed.',
                         action='store_true')
 
 
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
index c378c918..4e7bf2e6 100644
--- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
+++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
@@ -47,7 +47,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
     def print_tasks_stat(self) -> None:
         print(f'Unique users count: {self.unique_users}')
         print(f'Code snippets count: {sum(self.task_to_freq.values())}')
-        print('Tasks issues_statistics:')
+        print('Tasks statistics:')
         for task, freq in self.task_to_freq.items():
             print(f'Task {task.value}: {freq} items; {self.task_to_error_freq[task]} with tutor errors')
         print(self.__separator)
@@ -55,7 +55,7 @@ def print_tasks_stat(self) -> None:
     def print_error_stat(self) -> None:
         print(f'{self.fragments_with_error} code fragments has errors during running by Tutor')
         print(f'{len(self.error_to_freq.keys())} unique errors was found in Tutor')
-        print('Error issues_statistics:')
+        print('Error statistics:')
         for error, freq in self.error_to_freq.items():
             print(f'{error}: {freq} items')
         print(self.__separator)
@@ -107,7 +107,7 @@ def __init_solution_df_row(self, row: pd.DataFrame) -> None:
         self.__add_issues(self.hyperstyle_uniq_issue_to_freq, list(set(hyperstyle_issues) - set(common_issues)))
 
     def __print_freq_issues_stat(self, freq_stat: Dict[str, int], prefix: str) -> None:
-        print(f'{prefix} issues issues_statistics:')
+        print(f'{prefix} issues statistics:')
         for issue, freq in freq_stat.items():
             print(f'{issue} was found {freq} times')
         print(self.__separator)
diff --git a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
index 05a94f17..7d9bbec5 100644
--- a/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
+++ b/src/python/evaluation/paper_evaluation/user_dynamics/dynamics_gathering.py
@@ -39,7 +39,7 @@ def __get_top_freq_issues(issues: List[List[PenaltyIssue]], n: int) -> Dict[str,
     return dict(Counter(all_issues).most_common(n))
 
 
-# Get issues_statistics only for users that have >= freq_boundary solutions in solutions_df
+# Get statistics only for users that have >= freq_boundary solutions in solutions_df
 # Statistics for each student has:
 #  - <traceback> - list of list of issues, but without INFO issues
 #  - <top_issues> - for each key of issue from <traceback> has frequency.
diff --git a/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
index f52d94d6..5d7530a3 100644
--- a/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
+++ b/src/python/evaluation/qodana/convert_to_hyperstyle_inspections.py
@@ -75,7 +75,7 @@ def __qodana_to_hyperstyle_output(qodana_output: str, issues_to_keep: Set[str])
 
 # Resort all fields in the qodana dataframe according to the hyperstyle dataframe
 # Add column with hyperstyle output (convert qodana output to hyperstyle output)
-# Add grade column with grades from hyperstyle dataframe (to gather issues_statistics by diffs_between_df.py script)
+# Add grade column with grades from hyperstyle dataframe (to gather statistics by diffs_between_df.py script)
 def __prepare_qodana_df(qodana_df: pd.DataFrame, hyperstyle_df: pd.DataFrame,
                         issues_to_keep: Set[str]) -> pd.DataFrame:
     qodana_df = __preprocess_df(qodana_df, hyperstyle_df[ColumnName.ID.value])