From cfc79c4e690d7c542c4d33a672f935ffe2417ff4 Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Fri, 21 May 2021 16:50:33 +0300
Subject: [PATCH 1/6] Get unique inspections

---
 .../qodana/get_unique_inspectors.py           | 60 +++++++++++++++++++
 src/python/evaluation/qodana/util/models.py   |  2 +
 2 files changed, 62 insertions(+)
 create mode 100644 src/python/evaluation/qodana/get_unique_inspectors.py

diff --git a/src/python/evaluation/qodana/get_unique_inspectors.py b/src/python/evaluation/qodana/get_unique_inspectors.py
new file mode 100644
index 00000000..fb2f5906
--- /dev/null
+++ b/src/python/evaluation/qodana/get_unique_inspectors.py
@@ -0,0 +1,60 @@
+import argparse
+import json
+from pathlib import Path
+from typing import Set
+
+import pandas as pd
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.common.csv_util import write_dataframe_to_csv
+from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
+from src.python.evaluation.qodana.util.models import QodanaJsonField, QodanaColumnName
+from src.python.review.common.file_system import get_parent_folder, Extension
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help=f'Csv file with solutions. This file must be graded by Qodana.')
+
+
+def __get_inspections_ids(json_issues: str) -> Set[str]:
+    issues_list = json.loads(json_issues)[QodanaJsonField.ISSUES.value]
+    return set(map(lambda i: i.problem_id, issues_list))
+
+
+def __push_inspections_ids(unique_inspections: Set[str], new_inspections: Set[str]) -> None:
+    unique_inspections.union(new_inspections)
+
+
+def __get_unique_inspections(solutions_df: pd.DataFrame) -> Set[str]:
+    unique_inspections: Set[str] = set()
+    solutions_df.apply(lambda row: __push_inspections_ids(unique_inspections,
+                                                          __get_inspections_ids(
+                                                              row[QodanaColumnName.INSPECTIONS.value]
+                                                          )), axis=1)
+    return unique_inspections
+
+
+def __create_unique_inspections_df(unique_inspections: Set[str]) -> pd.DataFrame:
+    id_to_inspection = {}
+    for index, inspection in enumerate(unique_inspections):
+        id_to_inspection[index + 1] = inspection
+    return pd.DataFrame(id_to_inspection.items(),
+                        columns=[QodanaColumnName.ID.value, QodanaColumnName.INSPECTION_ID.value])
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    configure_arguments(parser)
+    args = parser.parse_args()
+
+    solutions_file_path = args.solutions_file_path
+    solutions_df = get_solutions_df_by_file_path(solutions_file_path)
+
+    inspections_df = __create_unique_inspections_df(__get_unique_inspections(solutions_df))
+    output_path = get_parent_folder(Path(solutions_file_path))
+    write_dataframe_to_csv(output_path / f'inspections{Extension.CSV.value}',  inspections_df)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/python/evaluation/qodana/util/models.py b/src/python/evaluation/qodana/util/models.py
index f5b3a589..de86da7c 100644
--- a/src/python/evaluation/qodana/util/models.py
+++ b/src/python/evaluation/qodana/util/models.py
@@ -42,6 +42,8 @@ def from_json(cls, str_json: str) -> 'QodanaIssue':
 @unique
 class QodanaColumnName(Enum):
     INSPECTIONS = 'inspections'
+    ID = 'id'
+    INSPECTION_ID = 'inspection_id'
 
 
 @unique

From a89df95f41f3f8583c03971f98bd38eae70af92c Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Fri, 21 May 2021 17:10:08 +0300
Subject: [PATCH 2/6] Fix small bug and add readme

---
 src/python/evaluation/qodana/README.md        | 23 +++++++++++++++++++
 .../qodana/get_unique_inspectors.py           | 23 +++++++------------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index a18748a4..217b0fdd 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -33,6 +33,7 @@ This module allows preparing datasets that were graded by [dataset_marking.py](d
 Data processing consists of several stages:
 - union several `csv` files that were graded by [dataset_marking.py](dataset_marking.py) script 
   and filter inspections list if it is necessary;
+- get all unique inspections from the dataset;
 - convert `csv` file into a special format.
 
 ## Filter inspections
@@ -59,3 +60,25 @@ Argument | Description
 |**&#8209;i**, **&#8209;&#8209;inspections**| Set of inspections ids to exclude from the dataset separated by comma. By default all inspections remain. |
 
 The resulting file will be stored in the `dataset_folder`.
+
+___
+
+## Get all unique inspections
+
+This stage allow you to get all unique inspections from a `csv` file graded by Qodana. 
+Please, note that your input file must be graded by [dataset_marking.py](dataset_marking.py) script 
+and has `inspections` column.
+
+Output file is a new `csv` file with two columns: `id` and `inspection_id`. 
+`id` is unique number for each inspection, minimal value is 1.
+`inspection_id` is unique Qoadana id for each inspection.
+
+#### Usage
+
+Run the [get_unique_inspectors.py](get_unique_inspectors.py) with the arguments from command line.
+
+Required arguments:
+
+`solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script.
+
+The resulting file will be stored in the same folder as the input file.
diff --git a/src/python/evaluation/qodana/get_unique_inspectors.py b/src/python/evaluation/qodana/get_unique_inspectors.py
index fb2f5906..887df3c5 100644
--- a/src/python/evaluation/qodana/get_unique_inspectors.py
+++ b/src/python/evaluation/qodana/get_unique_inspectors.py
@@ -7,32 +7,25 @@
 from src.python.common.tool_arguments import RunToolArgument
 from src.python.evaluation.common.csv_util import write_dataframe_to_csv
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
-from src.python.evaluation.qodana.util.models import QodanaJsonField, QodanaColumnName
-from src.python.review.common.file_system import get_parent_folder, Extension
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField
+from src.python.review.common.file_system import Extension, get_parent_folder
 
 
 def configure_arguments(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
                         type=lambda value: Path(value).absolute(),
-                        help=f'Csv file with solutions. This file must be graded by Qodana.')
+                        help='Csv file with solutions. This file must be graded by Qodana.')
 
 
 def __get_inspections_ids(json_issues: str) -> Set[str]:
-    issues_list = json.loads(json_issues)[QodanaJsonField.ISSUES.value]
+    issues_list = list(map(lambda i: QodanaIssue.from_json(i), json.loads(json_issues)[QodanaJsonField.ISSUES.value]))
     return set(map(lambda i: i.problem_id, issues_list))
 
 
-def __push_inspections_ids(unique_inspections: Set[str], new_inspections: Set[str]) -> None:
-    unique_inspections.union(new_inspections)
-
-
 def __get_unique_inspections(solutions_df: pd.DataFrame) -> Set[str]:
-    unique_inspections: Set[str] = set()
-    solutions_df.apply(lambda row: __push_inspections_ids(unique_inspections,
-                                                          __get_inspections_ids(
-                                                              row[QodanaColumnName.INSPECTIONS.value]
-                                                          )), axis=1)
-    return unique_inspections
+    inspections = solutions_df.apply(lambda row: __get_inspections_ids(row[QodanaColumnName.INSPECTIONS.value]),
+                                     axis=1)
+    return set.union(*inspections.values)
 
 
 def __create_unique_inspections_df(unique_inspections: Set[str]) -> pd.DataFrame:
@@ -53,7 +46,7 @@ def main() -> None:
 
     inspections_df = __create_unique_inspections_df(__get_unique_inspections(solutions_df))
     output_path = get_parent_folder(Path(solutions_file_path))
-    write_dataframe_to_csv(output_path / f'inspections{Extension.CSV.value}',  inspections_df)
+    write_dataframe_to_csv(output_path / f'inspections{Extension.CSV.value}', inspections_df)
 
 
 if __name__ == '__main__':

From 63cae5aa0c1c159c177a090bcd7496f99e0a8f2d Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Fri, 21 May 2021 18:32:30 +0300
Subject: [PATCH 3/6] Add a script for preprocessing data for a qodana model

---
 src/python/common/tool_arguments.py           |  3 +
 src/python/evaluation/qodana/README.md        | 75 +++++++++++++++++++
 .../evaluation/qodana/filter_inspections.py   |  8 +-
 .../qodana/fragment_to_inspections_list.py    | 60 +++++++++++++++
 .../qodana/get_unique_inspectors.py           |  9 +--
 src/python/evaluation/qodana/util/models.py   |  5 ++
 6 files changed, 150 insertions(+), 10 deletions(-)
 create mode 100644 src/python/evaluation/qodana/fragment_to_inspections_list.py

diff --git a/src/python/common/tool_arguments.py b/src/python/common/tool_arguments.py
index d3048051..a85c0e0a 100644
--- a/src/python/common/tool_arguments.py
+++ b/src/python/common/tool_arguments.py
@@ -89,3 +89,6 @@ class RunToolArgument(Enum):
 
     DIFFS_FILE_PATH = ArgumentsInfo(None, 'diffs_file_path',
                                     'Path to a file with serialized diffs that were founded by diffs_between_df.py')
+
+    QODANA_SOLUTIONS_FILE_PATH = ArgumentsInfo(None, 'solutions_file_path',
+                                               'Csv file with solutions. This file must be graded by Qodana.')
diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index 217b0fdd..7c3713ce 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -82,3 +82,78 @@ Required arguments:
 `solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script.
 
 The resulting file will be stored in the same folder as the input file.
+
+An example of the output file:
+
+```json
+id   |  inspection_id    
+-----|-------------------
+1    |  SystemOutErr   
+2    |  ConstantExpression
+```
+
+___
+
+#### Convert `csv` file into a special format
+
+This block describes what format can be converted csv-file with code samples 
+graded by [dataset_marking.py](dataset_marking.py) script.
+
+We have two different formats:
+- fragment to inspections list;
+- fragment to inspections list with positions.
+
+
+#### Fragment to inspections list
+
+This data representation match code fragments to a list with ids of inspections.
+
+Please, note that your input file must be graded by [dataset_marking.py](dataset_marking.py) script 
+and has `inspections` column.
+
+Output file is a new `csv` file with a new `inspections` column with list with ids of inspections. 
+If the list of inspections for the fragment is empty, then write 0.
+
+#### Usage
+
+Run the [fragment_to_inspections_list.py](fragment_to_inspections_list.py) with the arguments from command line.
+
+Required arguments:
+
+- `solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script,
+- `inspections_path` — path to csv-file with inspections list from the input file. You can get this file by [get_unique_inspectors.py](get_unique_inspectors.py) script.
+
+The resulting file will be stored in the same folder as the input file.
+
+An example of the input file:
+
+```json
+id   |  code             |  lang         |  inspections
+-----|-------------------|---------------|-----------------
+2    |  "// some code"   |  java11       |  "{""issues"": []}"
+3    |  "// some code"   |  java11       |  "{""issues"": [""{\"... \""problem_id\"": \""SystemOutErr\""}""]}"
+0    |  "// some code"   |  java11       |  "{""issues"": [""{\"...\""problem_id\"": \""ConstantExpression\""}"",""{\"...\""problem_id\"": \""ConstantExpression\""}""]}"
+1    |  "// some code"   |  java11       |  "{""issues"": []}"
+
+```
+
+with the inspections file: 
+
+```json
+id   |  inspection_id    
+-----|-------------------
+1    |  SystemOutErr   
+2    |  ConstantExpression
+```
+
+An example of the output file:
+
+```json
+id   |  code             |  lang         |  inspections
+-----|-------------------|---------------|-----------------
+2    |  "// some code"   |  java11       |  0
+3    |  "// some code"   |  java11       |  1
+0    |  "// some code"   |  java11       |  2,2
+1    |  "// some code"   |  java11       |  0
+
+```
\ No newline at end of file
diff --git a/src/python/evaluation/qodana/filter_inspections.py b/src/python/evaluation/qodana/filter_inspections.py
index 6f758965..9321a7eb 100644
--- a/src/python/evaluation/qodana/filter_inspections.py
+++ b/src/python/evaluation/qodana/filter_inspections.py
@@ -1,5 +1,4 @@
 import argparse
-import json
 from pathlib import Path
 from typing import List
 
@@ -7,7 +6,7 @@
 from src.python.evaluation.common.csv_util import write_dataframe_to_csv
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
 from src.python.evaluation.common.util import parse_set_arg
-from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
 from src.python.evaluation.qodana.util.util import to_json
 from src.python.review.common.file_system import Extension, extension_file_condition, get_all_file_system_items
 
@@ -35,9 +34,8 @@ def __get_qodana_dataset(root: Path) -> pd.DataFrame:
 
 
 def __filter_inspections(json_issues: str, inspections_to_keep: List[str]) -> str:
-    issues_list = json.loads(json_issues)[QodanaJsonField.ISSUES.value]
-    filtered_issues = list(filter(lambda i: i.problem_id not in inspections_to_keep,
-                                  map(lambda i: QodanaIssue.from_json(i), issues_list)))
+    issues_list = QodanaIssue.parse_list_issues_from_json(json_issues)
+    filtered_issues = list(filter(lambda i: i.problem_id not in inspections_to_keep, issues_list))
     return to_json(filtered_issues)
 
 
diff --git a/src/python/evaluation/qodana/fragment_to_inspections_list.py b/src/python/evaluation/qodana/fragment_to_inspections_list.py
new file mode 100644
index 00000000..26513522
--- /dev/null
+++ b/src/python/evaluation/qodana/fragment_to_inspections_list.py
@@ -0,0 +1,60 @@
+import argparse
+from pathlib import Path
+from typing import Dict
+
+import pandas as pd
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.common.csv_util import write_dataframe_to_csv
+from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
+from src.python.review.common.file_system import Extension, get_parent_folder
+
+INSPECTIONS = QodanaColumnName.INSPECTIONS.value
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help=RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.description)
+
+    parser.add_argument('inspections_path',
+                        type=lambda value: Path(value).absolute(),
+                        help='Path to a CSV file with inspections list')
+
+
+def __get_inspections_dict(inspections_path: str) -> Dict[str, int]:
+    inspections_df = pd.read_csv(inspections_path)
+    inspections_dict = inspections_df.set_index(QodanaColumnName.INSPECTION_ID.value).T.to_dict('list')
+    for qodana_id, id_list in inspections_dict.items():
+        inspections_dict[qodana_id] = id_list[0]
+    return inspections_dict
+
+
+def __replace_inspections_on_its_ids(json_issues: str, inspections_dict: Dict[str, int]) -> str:
+    issues_list = QodanaIssue.parse_list_issues_from_json(json_issues)
+    if len(issues_list) == 0:
+        inspections = '0'
+    else:
+        issues_list.sort(key=lambda x: x.problem_id)
+        inspections = ','.join(str(inspections_dict[i.problem_id]) for i in issues_list)
+    return inspections
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    configure_arguments(parser)
+    args = parser.parse_args()
+
+    solutions_file_path = args.solutions_file_path
+    solutions_df = get_solutions_df_by_file_path(solutions_file_path)
+    inspections_dict = __get_inspections_dict(args.inspections_path)
+
+    solutions_df[INSPECTIONS] = solutions_df.apply(
+        lambda row: __replace_inspections_on_its_ids(row[INSPECTIONS], inspections_dict), axis=1)
+
+    output_path = get_parent_folder(Path(solutions_file_path))
+    write_dataframe_to_csv(output_path / f'numbered_ids{Extension.CSV.value}', solutions_df)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/python/evaluation/qodana/get_unique_inspectors.py b/src/python/evaluation/qodana/get_unique_inspectors.py
index 887df3c5..1be04110 100644
--- a/src/python/evaluation/qodana/get_unique_inspectors.py
+++ b/src/python/evaluation/qodana/get_unique_inspectors.py
@@ -1,5 +1,4 @@
 import argparse
-import json
 from pathlib import Path
 from typing import Set
 
@@ -7,18 +6,18 @@
 from src.python.common.tool_arguments import RunToolArgument
 from src.python.evaluation.common.csv_util import write_dataframe_to_csv
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
-from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
 from src.python.review.common.file_system import Extension, get_parent_folder
 
 
 def configure_arguments(parser: argparse.ArgumentParser) -> None:
-    parser.add_argument(RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
+    parser.add_argument(RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.long_name,
                         type=lambda value: Path(value).absolute(),
-                        help='Csv file with solutions. This file must be graded by Qodana.')
+                        help=RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.description)
 
 
 def __get_inspections_ids(json_issues: str) -> Set[str]:
-    issues_list = list(map(lambda i: QodanaIssue.from_json(i), json.loads(json_issues)[QodanaJsonField.ISSUES.value]))
+    issues_list = QodanaIssue.parse_list_issues_from_json(json_issues)
     return set(map(lambda i: i.problem_id, issues_list))
 
 
diff --git a/src/python/evaluation/qodana/util/models.py b/src/python/evaluation/qodana/util/models.py
index de86da7c..769b216f 100644
--- a/src/python/evaluation/qodana/util/models.py
+++ b/src/python/evaluation/qodana/util/models.py
@@ -1,6 +1,7 @@
 import json
 from dataclasses import dataclass
 from enum import Enum, unique
+from typing import List
 
 
 @dataclass(frozen=True)
@@ -38,6 +39,10 @@ def from_json(cls, str_json: str) -> 'QodanaIssue':
             problem_id=issue[QodanaJsonField.PROBLEM_ID.value],
         )
 
+    @classmethod
+    def parse_list_issues_from_json(cls, str_json: str) -> List['QodanaIssue']:
+        return list(map(lambda i: QodanaIssue.from_json(i), json.loads(str_json)[QodanaJsonField.ISSUES.value]))
+
 
 @unique
 class QodanaColumnName(Enum):

From 0b384b369c91952e78fdafb839c0fc9dbd5a80b0 Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Mon, 24 May 2021 17:46:49 +0300
Subject: [PATCH 4/6] Count inspections

---
 src/python/evaluation/qodana/README.md        |  9 ++-
 .../qodana/fragment_to_inspections_list.py    |  3 +-
 .../qodana/get_unique_inspectors.py           | 66 +++++++++++++++----
 src/python/evaluation/qodana/util/models.py   |  2 +
 whitelist.txt                                 |  1 +
 5 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index 7c3713ce..09d58af0 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -69,9 +69,11 @@ This stage allow you to get all unique inspections from a `csv` file graded by Q
 Please, note that your input file must be graded by [dataset_marking.py](dataset_marking.py) script 
 and has `inspections` column.
 
-Output file is a new `csv` file with two columns: `id` and `inspection_id`. 
+Output file is a new `csv` file with four columns: `id`, `inspection_id`, `count_all`, `count_uniq`. 
 `id` is unique number for each inspection, minimal value is 1.
 `inspection_id` is unique Qoadana id for each inspection.
+`count_all` count all fragments where was this inspection (with duplicates).
+`count_uniq` count all fragments where was this inspection (without duplicates).
 
 #### Usage
 
@@ -81,6 +83,11 @@ Required arguments:
 
 `solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script.
 
+Optional arguments:
+Argument | Description
+--- | ---
+|**&#8209;&#8209;uniq**| To count all fragments for each inspection where was this inspection (without duplicates). By default it disabled. |
+
 The resulting file will be stored in the same folder as the input file.
 
 An example of the output file:
diff --git a/src/python/evaluation/qodana/fragment_to_inspections_list.py b/src/python/evaluation/qodana/fragment_to_inspections_list.py
index 26513522..2a5218f7 100644
--- a/src/python/evaluation/qodana/fragment_to_inspections_list.py
+++ b/src/python/evaluation/qodana/fragment_to_inspections_list.py
@@ -23,7 +23,8 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
 
 
 def __get_inspections_dict(inspections_path: str) -> Dict[str, int]:
-    inspections_df = pd.read_csv(inspections_path)
+    inspections_df = pd.read_csv(inspections_path,
+                                 usecols=[QodanaColumnName.ID.value, QodanaColumnName.INSPECTION_ID.value])
     inspections_dict = inspections_df.set_index(QodanaColumnName.INSPECTION_ID.value).T.to_dict('list')
     for qodana_id, id_list in inspections_dict.items():
         inspections_dict[qodana_id] = id_list[0]
diff --git a/src/python/evaluation/qodana/get_unique_inspectors.py b/src/python/evaluation/qodana/get_unique_inspectors.py
index 1be04110..35c32bdb 100644
--- a/src/python/evaluation/qodana/get_unique_inspectors.py
+++ b/src/python/evaluation/qodana/get_unique_inspectors.py
@@ -1,6 +1,8 @@
 import argparse
+import itertools
+from collections import defaultdict
 from pathlib import Path
-from typing import Set
+from typing import Dict, List, Optional
 
 import pandas as pd
 from src.python.common.tool_arguments import RunToolArgument
@@ -10,29 +12,65 @@
 from src.python.review.common.file_system import Extension, get_parent_folder
 
 
+INSPECTION_ID = QodanaColumnName.INSPECTION_ID.value
+INSPECTIONS = QodanaColumnName.INSPECTIONS.value
+COUNT_ALL = QodanaColumnName.COUNT_ALL.value
+COUNT_UNIQUE = QodanaColumnName.COUNT_UNIQUE.value
+ID = QodanaColumnName.ID.value
+
+
 def configure_arguments(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.long_name,
                         type=lambda value: Path(value).absolute(),
                         help=RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.description)
 
+    parser.add_argument('--uniq',
+                        help='If True, count fragments for eash inspection in which this inspection was.',
+                        action='store_true')
+
 
-def __get_inspections_ids(json_issues: str) -> Set[str]:
+def __get_inspections_ids(json_issues: str) -> List[str]:
     issues_list = QodanaIssue.parse_list_issues_from_json(json_issues)
-    return set(map(lambda i: i.problem_id, issues_list))
+    return list(map(lambda i: i.problem_id, issues_list))
+
+
+def __get_inspections_from_df(solutions_df: pd.DataFrame) -> List[str]:
+    inspections = solutions_df.apply(lambda row: __get_inspections_ids(row[INSPECTIONS]), axis=1)
+    return list(itertools.chain.from_iterable(inspections.values))
+
+
+def __count_uniq_inspections_in_fragment(json_issues: str, inspection_id_to_fragments: Dict[str, int]) -> None:
+    issues_list = set(__get_inspections_ids(json_issues))
+    for issue in issues_list:
+        inspection_id_to_fragments[issue] += 1
+
+
+def __get_uniq_inspections_in_all_fragments(solutions_df: pd.DataFrame) -> Dict[str, int]:
+    inspection_id_to_fragments: Dict[str, int] = defaultdict(int)
+    solutions_df.apply(lambda row: __count_uniq_inspections_in_fragment(row[INSPECTIONS], inspection_id_to_fragments),
+                       axis=1)
+
+    return inspection_id_to_fragments
 
 
-def __get_unique_inspections(solutions_df: pd.DataFrame) -> Set[str]:
-    inspections = solutions_df.apply(lambda row: __get_inspections_ids(row[QodanaColumnName.INSPECTIONS.value]),
-                                     axis=1)
-    return set.union(*inspections.values)
+def __get_all_inspections_by_inspection_id(inspection_id: str, all_inspections: List[str]) -> List[str]:
+    return list(filter(lambda i: i == inspection_id, all_inspections))
 
 
-def __create_unique_inspections_df(unique_inspections: Set[str]) -> pd.DataFrame:
+def __create_unique_inspections_df(inspections: List[str],
+                                   inspection_id_to_fragments: Optional[Dict[str, int]]) -> pd.DataFrame:
     id_to_inspection = {}
-    for index, inspection in enumerate(unique_inspections):
+    for index, inspection in enumerate(set(inspections)):
         id_to_inspection[index + 1] = inspection
-    return pd.DataFrame(id_to_inspection.items(),
-                        columns=[QodanaColumnName.ID.value, QodanaColumnName.INSPECTION_ID.value])
+    inspections_df = pd.DataFrame(id_to_inspection.items(), columns=[ID, INSPECTION_ID])
+    inspections_df[COUNT_ALL] = inspections_df.apply(lambda row: len(__get_all_inspections_by_inspection_id(
+        row[INSPECTION_ID], inspections)), axis=1)
+    if inspection_id_to_fragments is None:
+        inspections_df[COUNT_UNIQUE] = 0
+    else:
+        inspections_df[COUNT_UNIQUE] = inspections_df.apply(lambda row: inspection_id_to_fragments.get(
+            row[INSPECTION_ID], 0), axis=1)
+    return inspections_df
 
 
 def main() -> None:
@@ -42,8 +80,12 @@ def main() -> None:
 
     solutions_file_path = args.solutions_file_path
     solutions_df = get_solutions_df_by_file_path(solutions_file_path)
+    if args.uniq:
+        inspection_id_to_fragments = __get_uniq_inspections_in_all_fragments(solutions_df)
+    else:
+        inspection_id_to_fragments = None
+    inspections_df = __create_unique_inspections_df(__get_inspections_from_df(solutions_df), inspection_id_to_fragments)
 
-    inspections_df = __create_unique_inspections_df(__get_unique_inspections(solutions_df))
     output_path = get_parent_folder(Path(solutions_file_path))
     write_dataframe_to_csv(output_path / f'inspections{Extension.CSV.value}', inspections_df)
 
diff --git a/src/python/evaluation/qodana/util/models.py b/src/python/evaluation/qodana/util/models.py
index 769b216f..08ce4c9f 100644
--- a/src/python/evaluation/qodana/util/models.py
+++ b/src/python/evaluation/qodana/util/models.py
@@ -49,6 +49,8 @@ class QodanaColumnName(Enum):
     INSPECTIONS = 'inspections'
     ID = 'id'
     INSPECTION_ID = 'inspection_id'
+    COUNT_ALL = 'count_all'
+    COUNT_UNIQUE = 'count_unique'
 
 
 @unique
diff --git a/whitelist.txt b/whitelist.txt
index e7c8e657..e4dad09b 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -115,3 +115,4 @@ groupby
 getuid
 Popen
 datasets
+usecols

From 93f2a827917e0fc1c8936ebfa8554704942dfd2f Mon Sep 17 00:00:00 2001
From: "Anastasiia.Birillo" <nbirillo@mail.ru>
Date: Mon, 24 May 2021 17:50:36 +0300
Subject: [PATCH 5/6] Update readme

---
 src/python/evaluation/qodana/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index 09d58af0..8d1db192 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -93,10 +93,10 @@ The resulting file will be stored in the same folder as the input file.
 An example of the output file:
 
 ```json
-id   |  inspection_id    
------|-------------------
-1    |  SystemOutErr   
-2    |  ConstantExpression
+id   |  inspection_id      |  count_all   |  count_unique
+-----|---------------------|--------------|--------------
+1    |  SystemOutErr       |    5         |     2
+2    |  ConstantExpression |    1         |     1
 ```
 
 ___

From 03bba29f9074a9829a040d935274b35ccda81c41 Mon Sep 17 00:00:00 2001
From: Nastya Birillo <anastasia.i.birillo@gmail.com>
Date: Mon, 24 May 2021 19:59:52 +0300
Subject: [PATCH 6/6] Add a script for preprocessing data for the second qodana
 model (#36)

Add a script for preprocessing data for the second qodana model (inspections line by line)
---
 src/python/common/tool_arguments.py           |  4 +
 src/python/evaluation/README.md               |  6 +-
 src/python/evaluation/inspectors/README.md    |  4 +-
 .../inspectors/print_inspectors_statistics.py |  4 +-
 src/python/evaluation/qodana/README.md        | 74 ++++++++++++++++++-
 .../qodana/fragment_to_inspections_list.py    | 42 ++---------
 ...agment_to_inspections_list_line_by_line.py | 62 ++++++++++++++++
 src/python/evaluation/qodana/util/util.py     | 44 ++++++++++-
 whitelist.txt                                 |  1 +
 9 files changed, 195 insertions(+), 46 deletions(-)
 create mode 100644 src/python/evaluation/qodana/fragment_to_inspections_list_line_by_line.py

diff --git a/src/python/common/tool_arguments.py b/src/python/common/tool_arguments.py
index a85c0e0a..65af5a53 100644
--- a/src/python/common/tool_arguments.py
+++ b/src/python/common/tool_arguments.py
@@ -92,3 +92,7 @@ class RunToolArgument(Enum):
 
     QODANA_SOLUTIONS_FILE_PATH = ArgumentsInfo(None, 'solutions_file_path',
                                                'Csv file with solutions. This file must be graded by Qodana.')
+
+    QODANA_INSPECTIONS_PATH = ArgumentsInfo(None, 'inspections_path', 'Path to a CSV file with inspections list.')
+
+    QODANA_DUPLICATES = ArgumentsInfo(None, '--remove-duplicates', 'Remove duplicates around inspections')
diff --git a/src/python/evaluation/README.md b/src/python/evaluation/README.md
index 5aa4bdf7..af2dbbd8 100644
--- a/src/python/evaluation/README.md
+++ b/src/python/evaluation/README.md
@@ -29,7 +29,7 @@ Optional arguments:
 Argument | Description
 --- | ---
 |**&#8209;f**, **&#8209;&#8209;format**| The output format. Available values: `json`, `text`. The default value is `json` . Use this argument when `traceback` is enabled, otherwise it will not be used.|
-|**&#8209;tp**, **&#8209;&#8209;tool_path**| Path to run-tool. Default is `src/python/review/run_tool.py` .|
+|**&#8209;tp**, **&#8209;&#8209;tool&#8209path**| Path to run-tool. Default is `src/python/review/run_tool.py` .|
 |**&#8209;&#8209;traceback**| To include a column with errors traceback into an output file. Default is `False`.|
-|**&#8209;ofp**, **&#8209;&#8209;output_folder_path**| An explicit folder path to store file with results. Default is a parent directory of a folder with xlsx-file or csv-file sent for inspection. |
-|**&#8209;ofn**, **&#8209;&#8209;output_file_name**| A name of an output file where evaluation results will be stored. Default is `results.xlsx` or `results.csv`.|
+|**&#8209;ofp**, **&#8209;&#8209;output&#8209folder&#8209path**| An explicit folder path to store file with results. Default is a parent directory of a folder with xlsx-file or csv-file sent for inspection. |
+|**&#8209;ofn**, **&#8209;&#8209;output&#8209file&#8209name**| A name of an output file where evaluation results will be stored. Default is `results.xlsx` or `results.csv`.|
diff --git a/src/python/evaluation/inspectors/README.md b/src/python/evaluation/inspectors/README.md
index a0de1314..5c54fe93 100644
--- a/src/python/evaluation/inspectors/README.md
+++ b/src/python/evaluation/inspectors/README.md
@@ -161,8 +161,8 @@ Optional arguments:
 Argument | Description
 --- | ---
 |**&#8209;&#8209;categorize**| If True, statistics will be categorized by several categories. By default is disabled.|
-|**&#8209;n**, **&#8209;&#8209;top_n**| The top N items will be printed. Default value is 10.|
-|**&#8209;&#8209;full_stat**| If True, full statistics (with all issues) will be printed. By default is disabled.|
+|**&#8209;n**, **&#8209;&#8209;top&#8209;n**| The top N items will be printed. Default value is 10.|
+|**&#8209;&#8209;full&#8209;stat**| If True, full statistics (with all issues) will be printed. By default is disabled.|
 
 The statistics will be printed into console.
 
diff --git a/src/python/evaluation/inspectors/print_inspectors_statistics.py b/src/python/evaluation/inspectors/print_inspectors_statistics.py
index 8b132a31..0a5605dd 100644
--- a/src/python/evaluation/inspectors/print_inspectors_statistics.py
+++ b/src/python/evaluation/inspectors/print_inspectors_statistics.py
@@ -19,12 +19,12 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
                         help='If True, statistics will be categorized by several categories.',
                         action='store_true')
 
-    parser.add_argument('-n', '--top_n',
+    parser.add_argument('-n', '--top-n',
                         help='The top N items will be printed',
                         type=int,
                         default=10)
 
-    parser.add_argument('--full_stat',
+    parser.add_argument('--full-stat',
                         help='If True, full statistics will be printed.',
                         action='store_true')
 
diff --git a/src/python/evaluation/qodana/README.md b/src/python/evaluation/qodana/README.md
index 8d1db192..4e78972c 100644
--- a/src/python/evaluation/qodana/README.md
+++ b/src/python/evaluation/qodana/README.md
@@ -130,6 +130,11 @@ Required arguments:
 - `solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script,
 - `inspections_path` — path to csv-file with inspections list from the input file. You can get this file by [get_unique_inspectors.py](get_unique_inspectors.py) script.
 
+Optional arguments:
+Argument | Description
+--- | ---
+|**&#8209;&#8209;remove&#8209;duplicates**| Remove duplicates around inspections in each row. Default value is `False`. |
+
 The resulting file will be stored in the same folder as the input file.
 
 An example of the input file:
@@ -141,7 +146,6 @@ id   |  code             |  lang         |  inspections
 3    |  "// some code"   |  java11       |  "{""issues"": [""{\"... \""problem_id\"": \""SystemOutErr\""}""]}"
 0    |  "// some code"   |  java11       |  "{""issues"": [""{\"...\""problem_id\"": \""ConstantExpression\""}"",""{\"...\""problem_id\"": \""ConstantExpression\""}""]}"
 1    |  "// some code"   |  java11       |  "{""issues"": []}"
-
 ```
 
 with the inspections file: 
@@ -163,4 +167,70 @@ id   |  code             |  lang         |  inspections
 0    |  "// some code"   |  java11       |  2,2
 1    |  "// some code"   |  java11       |  0
 
-```
\ No newline at end of file
+```
+
+---
+
+#### Fragment to inspections list with positions
+
+This data representation match each line in code fragments to a list with ids of inspections in this line.
+
+Please, note that your input file must be graded by [dataset_marking.py](dataset_marking.py) script 
+and has `inspections` column.
+
+Output file is a new `csv` file with a new `inspections` column with list with ids of inspections. 
+If the list of inspections for the fragment is empty, then write 0. 
+Note, that each line in code fragments in the new file is stored in a separate row. 
+All indents as well as blank lines are keeped.
+
+#### Usage
+
+Run the [fragment_to_inspections_list_line_by_line.py](fragment_to_inspections_list_line_by_line.py) with the arguments from command line.
+
+Required arguments:
+
+- `solutions_file_path` — path to csv-file with code samples graded by [dataset_marking.py](dataset_marking.py) script,
+- `inspections_path` — path to csv-file with inspections list from the input file. You can get this file by [get_unique_inspectors.py](get_unique_inspectors.py) script.
+
+Optional arguments:
+Argument | Description
+--- | ---
+|**&#8209;&#8209;remove&#8209;duplicates**| Remove duplicates around inspections in each row. Default value is `False`. |
+
+The resulting file will be stored in the same folder as the input file.
+
+An example of the input file:
+
+```json
+id   |  code             |  lang         |  inspections
+-----|-------------------|---------------|-----------------
+2    |  "// some code"   |  java11       |  "{""issues"": []}"
+3    |  "// some code"   |  java11       |  "{""issues"": [""{\"... \""problem_id\"": \""SystemOutErr\""}""]}"
+0    |  "// some code"   |  java11       |  "{""issues"": [""{\"...\""problem_id\"": \""ConstantExpression\""}"",""{\"...\""problem_id\"": \""ConstantExpression\""}""]}"
+1    |  "// some code"   |  java11       |  "{""issues"": []}"
+```
+
+with the inspections file: 
+
+```json
+id   |  inspection_id    
+-----|-------------------
+1    |  SystemOutErr   
+2    |  ConstantExpression
+```
+
+An example of the output file:
+
+```json
+id   |  code                                  |  lang         |  inspections
+-----|----------------------------------------|---------------|-----------------
+2    |  "// first line from code with id 2"   |  java11       |  0
+2    |  "// second line from code with id 2"  |  java11       |  0
+3    |  "// first line from code with id 3"   |  java11       |  1
+3    |  "// second line from code with id 3"  |  java11       |  0
+0    |  "// first line from code with id 0"   |  java11       |  0
+0    |  "// second line from code with id 0"  |  java11       |  2,2
+1    |  "// first line from code with id 1"   |  java11       |  0
+1    |  "// second line from code with id 1"  |  java11       |  0
+
+```
diff --git a/src/python/evaluation/qodana/fragment_to_inspections_list.py b/src/python/evaluation/qodana/fragment_to_inspections_list.py
index 2a5218f7..42fe3ec6 100644
--- a/src/python/evaluation/qodana/fragment_to_inspections_list.py
+++ b/src/python/evaluation/qodana/fragment_to_inspections_list.py
@@ -1,57 +1,29 @@
 import argparse
 from pathlib import Path
-from typing import Dict
 
-import pandas as pd
-from src.python.common.tool_arguments import RunToolArgument
 from src.python.evaluation.common.csv_util import write_dataframe_to_csv
 from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
 from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
+from src.python.evaluation.qodana.util.util import (
+    configure_model_converter_arguments, get_inspections_dict, replace_inspections_on_its_ids,
+)
 from src.python.review.common.file_system import Extension, get_parent_folder
 
 INSPECTIONS = QodanaColumnName.INSPECTIONS.value
 
 
-def configure_arguments(parser: argparse.ArgumentParser) -> None:
-    parser.add_argument(RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.long_name,
-                        type=lambda value: Path(value).absolute(),
-                        help=RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.description)
-
-    parser.add_argument('inspections_path',
-                        type=lambda value: Path(value).absolute(),
-                        help='Path to a CSV file with inspections list')
-
-
-def __get_inspections_dict(inspections_path: str) -> Dict[str, int]:
-    inspections_df = pd.read_csv(inspections_path,
-                                 usecols=[QodanaColumnName.ID.value, QodanaColumnName.INSPECTION_ID.value])
-    inspections_dict = inspections_df.set_index(QodanaColumnName.INSPECTION_ID.value).T.to_dict('list')
-    for qodana_id, id_list in inspections_dict.items():
-        inspections_dict[qodana_id] = id_list[0]
-    return inspections_dict
-
-
-def __replace_inspections_on_its_ids(json_issues: str, inspections_dict: Dict[str, int]) -> str:
-    issues_list = QodanaIssue.parse_list_issues_from_json(json_issues)
-    if len(issues_list) == 0:
-        inspections = '0'
-    else:
-        issues_list.sort(key=lambda x: x.problem_id)
-        inspections = ','.join(str(inspections_dict[i.problem_id]) for i in issues_list)
-    return inspections
-
-
 def main() -> None:
     parser = argparse.ArgumentParser()
-    configure_arguments(parser)
+    configure_model_converter_arguments(parser)
     args = parser.parse_args()
 
     solutions_file_path = args.solutions_file_path
     solutions_df = get_solutions_df_by_file_path(solutions_file_path)
-    inspections_dict = __get_inspections_dict(args.inspections_path)
+    inspections_dict = get_inspections_dict(args.inspections_path)
 
     solutions_df[INSPECTIONS] = solutions_df.apply(
-        lambda row: __replace_inspections_on_its_ids(row[INSPECTIONS], inspections_dict), axis=1)
+        lambda row: replace_inspections_on_its_ids(QodanaIssue.parse_list_issues_from_json(row[INSPECTIONS]),
+                                                   inspections_dict, args.remove_duplicates), axis=1)
 
     output_path = get_parent_folder(Path(solutions_file_path))
     write_dataframe_to_csv(output_path / f'numbered_ids{Extension.CSV.value}', solutions_df)
diff --git a/src/python/evaluation/qodana/fragment_to_inspections_list_line_by_line.py b/src/python/evaluation/qodana/fragment_to_inspections_list_line_by_line.py
new file mode 100644
index 00000000..c70d9ba1
--- /dev/null
+++ b/src/python/evaluation/qodana/fragment_to_inspections_list_line_by_line.py
@@ -0,0 +1,62 @@
+import argparse
+import os
+from itertools import groupby
+from pathlib import Path
+from typing import Dict, List
+
+import pandas as pd
+from src.python.evaluation.common.csv_util import write_dataframe_to_csv
+from src.python.evaluation.common.pandas_util import get_solutions_df_by_file_path
+from src.python.evaluation.common.util import ColumnName
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue
+from src.python.evaluation.qodana.util.util import (
+    configure_model_converter_arguments, get_inspections_dict, replace_inspections_on_its_ids,
+)
+from src.python.review.common.file_system import Extension, get_parent_folder
+
+
+INSPECTIONS = QodanaColumnName.INSPECTIONS.value
+CODE = ColumnName.CODE.value
+
+
+# Make a new dataframe where code fragment is separated line by line and inspections are grouped line by line
+def __replace_inspections_to_its_ids_in_row(row: pd.Series, inspections_dict: Dict[str, int],
+                                            to_remove_duplicates: bool) -> pd.DataFrame:
+    row_df = pd.DataFrame(row).transpose()
+    fragment_lines = row_df.iloc[0][CODE].split(os.linesep)
+    fragment_df = row_df.loc[row_df.index.repeat(len(fragment_lines))].reset_index(drop=True)
+
+    issues_list = QodanaIssue.parse_list_issues_from_json(row_df.iloc[0][INSPECTIONS])
+    line_number_to_issues = {k: list(v) for k, v in groupby(issues_list, key=lambda i: i.line)}
+    for index, fragment_line in enumerate(fragment_lines):
+        issues = line_number_to_issues.get(index + 1, [])
+        fragment_df.iloc[index][CODE] = fragment_line
+        fragment_df.iloc[index][INSPECTIONS] = replace_inspections_on_its_ids(issues, inspections_dict,
+                                                                              to_remove_duplicates)
+    return fragment_df
+
+
+def __append_df(df: pd.DataFrame, df_list: List[pd.DataFrame]) -> None:
+    df_list.append(df)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    configure_model_converter_arguments(parser)
+    args = parser.parse_args()
+
+    solutions_file_path = args.solutions_file_path
+    solutions_df = get_solutions_df_by_file_path(solutions_file_path)
+    inspections_dict = get_inspections_dict(args.inspections_path)
+
+    fragment_df_list = []
+    solutions_df.apply(
+        lambda row: __append_df(__replace_inspections_to_its_ids_in_row(row, inspections_dict, args.remove_duplicates),
+                                fragment_df_list), axis=1)
+
+    output_path = get_parent_folder(Path(solutions_file_path))
+    write_dataframe_to_csv(output_path / f'numbered_ids_line_by_line{Extension.CSV.value}', pd.concat(fragment_df_list))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/python/evaluation/qodana/util/util.py b/src/python/evaluation/qodana/util/util.py
index 0c4b8712..3766b09d 100644
--- a/src/python/evaluation/qodana/util/util.py
+++ b/src/python/evaluation/qodana/util/util.py
@@ -1,7 +1,11 @@
+import argparse
 import json
-from typing import List
+from pathlib import Path
+from typing import Dict, List
 
-from src.python.evaluation.qodana.util.models import QodanaIssue, QodanaJsonField
+import pandas as pd
+from src.python.common.tool_arguments import RunToolArgument
+from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField
 
 
 def to_json(issues: List[QodanaIssue]) -> str:
@@ -9,3 +13,39 @@ def to_json(issues: List[QodanaIssue]) -> str:
         QodanaJsonField.ISSUES.value: list(map(lambda i: i.to_json(), issues)),
     }
     return json.dumps(issues_json)
+
+
+# Get a dictionary: Qodana inspection_id -> inspection_id from csv file with two columns: id, inspection_id
+def get_inspections_dict(inspections_path: str) -> Dict[str, int]:
+    inspections_df = pd.read_csv(inspections_path)
+    inspections_dict = inspections_df.set_index(QodanaColumnName.INSPECTION_ID.value).T.to_dict('list')
+    for qodana_id, id_list in inspections_dict.items():
+        inspections_dict[qodana_id] = id_list[0]
+    return inspections_dict
+
+
+def replace_inspections_on_its_ids(issues_list: List[QodanaIssue], inspections_dict: Dict[str, int],
+                                   to_remove_duplicates: bool) -> str:
+    if len(issues_list) == 0:
+        inspections = '0'
+    else:
+        problem_id_list = list(map(lambda i: inspections_dict[i.problem_id], issues_list))
+        if to_remove_duplicates:
+            problem_id_list = list(set(problem_id_list))
+        problem_id_list.sort()
+        inspections = ','.join(str(p) for p in problem_id_list)
+    return inspections
+
+
+def configure_model_converter_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument(RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help=RunToolArgument.QODANA_SOLUTIONS_FILE_PATH.value.description)
+
+    parser.add_argument(RunToolArgument.QODANA_INSPECTIONS_PATH.value.long_name,
+                        type=lambda value: Path(value).absolute(),
+                        help=RunToolArgument.QODANA_INSPECTIONS_PATH.value.description)
+
+    parser.add_argument(RunToolArgument.QODANA_DUPLICATES.value.long_name,
+                        help=RunToolArgument.QODANA_DUPLICATES.value.description,
+                        action='store_true')
diff --git a/whitelist.txt b/whitelist.txt
index e4dad09b..6269ca26 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -116,3 +116,4 @@ getuid
 Popen
 datasets
 usecols
+linesep