From 045deffec47cf123a8fc7fb54a7ac7e5e5c1f20f Mon Sep 17 00:00:00 2001 From: "Anastasiia.Birillo" Date: Mon, 17 May 2021 13:18:39 +0300 Subject: [PATCH 1/6] Update version to 1.2.0 --- VERSION.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION.md b/VERSION.md index 3eefcb9d..26aaba0e 100644 --- a/VERSION.md +++ b/VERSION.md @@ -1 +1 @@ -1.0.0 +1.2.0 From 7cd79e0473b7653ac35c033180faabf3fba5c4b4 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Birillo" Date: Thu, 20 May 2021 20:29:19 +0300 Subject: [PATCH 2/6] parse qodana output --- requirements-evaluation.txt | 1 + .../evaluation/qodana/dataset_marking.py | 30 +++++++++++++++---- src/python/evaluation/qodana/util/__init__.py | 0 .../evaluation/qodana/util/qoadana_issue.py | 11 +++++++ 4 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 src/python/evaluation/qodana/util/__init__.py create mode 100644 src/python/evaluation/qodana/util/qoadana_issue.py diff --git a/requirements-evaluation.txt b/requirements-evaluation.txt index e498430c..a325d3c0 100644 --- a/requirements-evaluation.txt +++ b/requirements-evaluation.txt @@ -3,3 +3,4 @@ pandas==1.2.3 numpy~=1.20.2 python_on_whales~=0.17.1 +docker diff --git a/src/python/evaluation/qodana/dataset_marking.py b/src/python/evaluation/qodana/dataset_marking.py index 9abd5e44..cc6878b6 100644 --- a/src/python/evaluation/qodana/dataset_marking.py +++ b/src/python/evaluation/qodana/dataset_marking.py @@ -10,7 +10,9 @@ from dataclasses import dataclass from math import ceil from pathlib import Path -from typing import Any, Dict, Optional, Set +from typing import Any, Dict, Optional, Set, List + +from src.python.evaluation.qodana.util.qoadana_issue import QodanaIssue sys.path.append("../../../..") @@ -155,6 +157,21 @@ def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: result = pd.concat(chunks) return result + @classmethod + def _get_fragment_id_from_fragment_file_path(cls, fragment_file_path: str) -> int: + pass + + @classmethod + def _parse_inspections_files(cls, inspections_files: List[Path]): + for file in inspections_files: + issues = json.loads(str(file))['problems'] + for issue in issues: + qodana_issue = QodanaIssue(line=issue['line'], offset=issue['offset'], length=issue['length'], + highlighted_element=issue['highlighted_element'], + description=issue['description']) + pass + pass + def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion): with new_temp_dir() as temp_dir: project_dir = temp_dir / "project" @@ -174,7 +191,10 @@ def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion): self._run_qodana(project_dir, results_dir) logger.info("Getting unique inspections") - inspections = self._get_inspections(results_dir) + inspections = self._get_inspections_files(results_dir) + + # Todo: open all jsons and parse inspections + existing_inspections = set(self.inspection_to_id.keys()) new_inspections = inspections.difference(existing_inspections) @@ -233,13 +253,11 @@ def _run_qodana(project_dir: Path, results_dir: Path): ) @staticmethod - def _get_inspections(results_dir: Path) -> Set[str]: + def _get_inspections_files(results_dir: Path) -> Set[Path]: files = os.listdir(results_dir) file_name_regex = re.compile(r"(\w*).json") - inspection_files = filter(lambda file: file_name_regex.match(file), files) - - return {file_name_regex.match(file).group(1) for file in inspection_files} + return set(map(lambda f: results_dir / f, filter(lambda file: file_name_regex.match(file), files))) def _parse(self, results_dir: Path, inspections: Set[str]): package_regex = re.compile(r"solution(\d*)") diff --git a/src/python/evaluation/qodana/util/__init__.py b/src/python/evaluation/qodana/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/qodana/util/qoadana_issue.py b/src/python/evaluation/qodana/util/qoadana_issue.py new file mode 100644 index 00000000..ff4db5d7 --- /dev/null +++ b/src/python/evaluation/qodana/util/qoadana_issue.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class QodanaIssue: + fragment_id: int + line: int + offset: int + length: int + highlighted_element: str + description: str From 75cac7b4e64570c74c950c9a060492511e7ffd14 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Birillo" Date: Thu, 20 May 2021 21:12:42 +0300 Subject: [PATCH 3/6] Change qodana scipt output --- .../evaluation/qodana/dataset_marking.py | 93 ++++++++++--------- .../util/{qoadana_issue.py => models.py} | 7 ++ src/python/review/common/file_system.py | 4 +- 3 files changed, 59 insertions(+), 45 deletions(-) rename src/python/evaluation/qodana/util/{qoadana_issue.py => models.py} (61%) diff --git a/src/python/evaluation/qodana/dataset_marking.py b/src/python/evaluation/qodana/dataset_marking.py index cc6878b6..7697fc23 100644 --- a/src/python/evaluation/qodana/dataset_marking.py +++ b/src/python/evaluation/qodana/dataset_marking.py @@ -10,9 +10,7 @@ from dataclasses import dataclass from math import ceil from pathlib import Path -from typing import Any, Dict, Optional, Set, List - -from src.python.evaluation.qodana.util.qoadana_issue import QodanaIssue +from typing import Any, Dict, List, Optional, Set sys.path.append("../../../..") @@ -21,8 +19,11 @@ from pandas import DataFrame from python_on_whales import docker from src.python.evaluation.common.util import ColumnName +from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue from src.python.review.application_config import LanguageVersion -from src.python.review.common.file_system import new_temp_dir +from src.python.review.common.file_system import ( + get_content_from_file, get_name_from_path, get_parent_folder, new_temp_dir, +) from src.python.review.run_tool import positive_int logger = logging.getLogger(__name__) @@ -105,10 +106,8 @@ def __init__(self, args: Namespace): self.inspections_output_path = args.inspections_output_path - self.inspection_to_id = {} - def mark(self): - df = pd.read_csv(self.dataset_path, index_col=ColumnName.ID.value, nrows=self.limit) + df = pd.read_csv(self.dataset_path, nrows=self.limit) group_by_lang = df.groupby(ColumnName.LANG.value) unique_languages = df[ColumnName.LANG.value].unique() @@ -136,12 +135,12 @@ def mark(self): logger.info("Writing the dataset to a file.") df.to_csv(self.dataset_output_path) - - id_to_inspection = {value: index for index, value in self.inspection_to_id.items()} - - id_to_inspection_df = pd.DataFrame.from_dict(id_to_inspection, orient="index", columns=["inspection"]) - id_to_inspection_df.index.name = "id" - id_to_inspection_df.to_csv(self.inspections_output_path) + # + # id_to_inspection = {value: index for index, value in self.inspection_to_id.items()} + # + # id_to_inspection_df = pd.DataFrame.from_dict(id_to_inspection, orient="index", columns=["inspection"]) + # id_to_inspection_df.index.name = "id" + # id_to_inspection_df.to_csv(self.inspections_output_path) def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: number_of_chunks = 1 @@ -149,30 +148,43 @@ def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: number_of_chunks = ceil(df.shape[0] / self.chunk_size) chunks = np.array_split(df, number_of_chunks) + labeled_chunks = [] for index, chunk in enumerate(chunks): logger.info(f"Processing chunk: {index + 1} / {number_of_chunks}") - self._mark_chunk(chunk, language) + chunk = self._mark_chunk(chunk, language) + labeled_chunks.append(chunk) logger.info(f"{language} processing finished.") - result = pd.concat(chunks) + result = pd.concat(labeled_chunks) return result + @classmethod + def _extract_fragment_id(cls, folder_name: str) -> int: + numbers = re.findall(r'\d+', folder_name) + if len(numbers) != 1: + raise ValueError(f'Can npt extract fragment id from {folder_name}') + return numbers[0] + @classmethod def _get_fragment_id_from_fragment_file_path(cls, fragment_file_path: str) -> int: - pass + folder_name = get_name_from_path(get_parent_folder(fragment_file_path), with_extension=False) + return cls._extract_fragment_id(folder_name) @classmethod - def _parse_inspections_files(cls, inspections_files: List[Path]): + def _parse_inspections_files(cls, inspections_files: Set[Path]) -> Dict[int, List[QodanaIssue]]: + id_to_issues: Dict[int, List[QodanaIssue]] = defaultdict(list) for file in inspections_files: - issues = json.loads(str(file))['problems'] + issues = json.loads(get_content_from_file(file))['problems'] for issue in issues: + fragment_id = cls._get_fragment_id_from_fragment_file_path(issue['file']) qodana_issue = QodanaIssue(line=issue['line'], offset=issue['offset'], length=issue['length'], highlighted_element=issue['highlighted_element'], - description=issue['description']) - pass - pass + description=issue['description'], fragment_id=fragment_id, + problem_id=issue['problem_class']['id']) + id_to_issues[fragment_id].append(qodana_issue) + return id_to_issues - def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion): + def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion) -> pd.DataFrame: with new_temp_dir() as temp_dir: project_dir = temp_dir / "project" results_dir = temp_dir / "results" @@ -190,22 +202,15 @@ def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion): logger.info("Running qodana") self._run_qodana(project_dir, results_dir) - logger.info("Getting unique inspections") - inspections = self._get_inspections_files(results_dir) - - # Todo: open all jsons and parse inspections + logger.info("Getting inspections") + inspections_files = self._get_inspections_files(results_dir) + inspections = self._parse_inspections_files(inspections_files) - existing_inspections = set(self.inspection_to_id.keys()) - new_inspections = inspections.difference(existing_inspections) + logger.info("Write inspections") + chunk[QodanaColumnName.INSPECTIONS.value] = chunk.apply( + lambda row: inspections.get(row[ColumnName.ID.value], []), axis=1) - for inspection in new_inspections: - self.inspection_to_id[inspection] = len(self.inspection_to_id) - - logger.info("Parsing the output of qodana") - solution_id_to_inspection_ids = self._parse(results_dir, inspections) - chunk["inspection_ids"] = "" - for solution_id, inspection_ids in solution_id_to_inspection_ids.items(): - chunk.loc[solution_id, "inspection_ids"] = ",".join(map(str, inspection_ids)) + return chunk @staticmethod def _copy_template(project_dir: Path, language: LanguageVersion): @@ -244,13 +249,15 @@ def _create_main_files(project_dir: Path, chunk: DataFrame, language: LanguageVe @staticmethod def _run_qodana(project_dir: Path, results_dir: Path): results_dir.mkdir() - - docker.run( - "jetbrains/qodana", - remove=True, - volumes=[(project_dir, "/data/project/"), (results_dir, "/data/results/")], - user=os.getuid(), - ) + try: + docker.run( + "jetbrains/qodana", + remove=True, + volumes=[(project_dir, "/data/project/"), (results_dir, "/data/results/")], + user=os.getuid(), + ) + except Exception as e: + logger.exception(f'Error during qodana running: {e}') @staticmethod def _get_inspections_files(results_dir: Path) -> Set[Path]: diff --git a/src/python/evaluation/qodana/util/qoadana_issue.py b/src/python/evaluation/qodana/util/models.py similarity index 61% rename from src/python/evaluation/qodana/util/qoadana_issue.py rename to src/python/evaluation/qodana/util/models.py index ff4db5d7..0cc7de2f 100644 --- a/src/python/evaluation/qodana/util/qoadana_issue.py +++ b/src/python/evaluation/qodana/util/models.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from enum import Enum, unique @dataclass(frozen=True) @@ -9,3 +10,9 @@ class QodanaIssue: length: int highlighted_element: str description: str + problem_id: str + + +@unique +class QodanaColumnName(Enum): + INSPECTIONS = 'inspections' diff --git a/src/python/review/common/file_system.py b/src/python/review/common/file_system.py index eb5bc768..a06daed7 100644 --- a/src/python/review/common/file_system.py +++ b/src/python/review/common/file_system.py @@ -85,7 +85,7 @@ def deserialize_data_from_file(path: Path) -> Any: # For getting name of the last folder or file # For example, returns 'folder' for both 'path/data/folder' and 'path/data/folder/' -def get_name_from_path(path: str, with_extension: bool = True) -> str: +def get_name_from_path(path: Union[Path, str], with_extension: bool = True) -> str: head, tail = os.path.split(path) # Tail can be empty if '/' is at the end of the path file_name = tail or os.path.basename(head) @@ -173,7 +173,7 @@ def add_slash(path: str) -> str: return path -def get_parent_folder(path: Path, to_add_slash: bool = False) -> Path: +def get_parent_folder(path: Union[Path, str], to_add_slash: bool = False) -> Path: path = remove_slash(str(path)) parent_folder = '/'.join(path.split('/')[:-1]) if to_add_slash: From c428b7859d2e4323f7dfef4734053b291638d9a7 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Birillo" Date: Fri, 21 May 2021 12:26:12 +0300 Subject: [PATCH 4/6] Fix a bug with qodana --- .../evaluation/qodana/dataset_marking.py | 83 +++++++++---------- src/python/evaluation/qodana/util/models.py | 39 +++++++++ src/python/review/common/file_system.py | 6 ++ src/python/review/common/subprocess_runner.py | 5 ++ whitelist.txt | 1 + 5 files changed, 92 insertions(+), 42 deletions(-) diff --git a/src/python/evaluation/qodana/dataset_marking.py b/src/python/evaluation/qodana/dataset_marking.py index 7697fc23..574e7d07 100644 --- a/src/python/evaluation/qodana/dataset_marking.py +++ b/src/python/evaluation/qodana/dataset_marking.py @@ -17,13 +17,14 @@ import numpy as np import pandas as pd from pandas import DataFrame -from python_on_whales import docker +from src.python.evaluation.common.csv_util import write_dataframe_to_csv from src.python.evaluation.common.util import ColumnName -from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue +from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField from src.python.review.application_config import LanguageVersion from src.python.review.common.file_system import ( - get_content_from_file, get_name_from_path, get_parent_folder, new_temp_dir, + create_directory, get_content_from_file, get_name_from_path, get_parent_folder, remove_directory, ) +from src.python.review.common.subprocess_runner import run_and_wait from src.python.review.run_tool import positive_int logger = logging.getLogger(__name__) @@ -134,13 +135,7 @@ def mark(self): df = pd.concat(groups) logger.info("Writing the dataset to a file.") - df.to_csv(self.dataset_output_path) - # - # id_to_inspection = {value: index for index, value in self.inspection_to_id.items()} - # - # id_to_inspection_df = pd.DataFrame.from_dict(id_to_inspection, orient="index", columns=["inspection"]) - # id_to_inspection_df.index.name = "id" - # id_to_inspection_df.to_csv(self.inspections_output_path) + write_dataframe_to_csv(self.dataset_output_path, df) def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: number_of_chunks = 1 @@ -149,10 +144,10 @@ def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: chunks = np.array_split(df, number_of_chunks) labeled_chunks = [] + # Todo: run this in parallel for index, chunk in enumerate(chunks): logger.info(f"Processing chunk: {index + 1} / {number_of_chunks}") - chunk = self._mark_chunk(chunk, language) - labeled_chunks.append(chunk) + labeled_chunks.append(self._mark_chunk(chunk, language, index)) logger.info(f"{language} processing finished.") result = pd.concat(labeled_chunks) @@ -176,7 +171,7 @@ def _parse_inspections_files(cls, inspections_files: Set[Path]) -> Dict[int, Lis for file in inspections_files: issues = json.loads(get_content_from_file(file))['problems'] for issue in issues: - fragment_id = cls._get_fragment_id_from_fragment_file_path(issue['file']) + fragment_id = int(cls._get_fragment_id_from_fragment_file_path(issue['file'])) qodana_issue = QodanaIssue(line=issue['line'], offset=issue['offset'], length=issue['length'], highlighted_element=issue['highlighted_element'], description=issue['description'], fragment_id=fragment_id, @@ -184,33 +179,43 @@ def _parse_inspections_files(cls, inspections_files: Set[Path]) -> Dict[int, Lis id_to_issues[fragment_id].append(qodana_issue) return id_to_issues - def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion) -> pd.DataFrame: - with new_temp_dir() as temp_dir: - project_dir = temp_dir / "project" - results_dir = temp_dir / "results" + @classmethod + def _to_json(cls, issues: List[QodanaIssue]) -> str: + issues_json = { + QodanaJsonField.ISSUES.value: list(map(lambda i: i.to_json(), issues)), + } + return json.dumps(issues_json) + + def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion, chunk_id: int) -> pd.DataFrame: + tmp_file_path = self.dataset_path.parent.absolute() / f'qodana_project_{chunk_id}' + create_directory(tmp_file_path) + + project_dir = tmp_file_path / "project" + results_dir = tmp_file_path / "results" - logger.info("Copying the template") - self._copy_template(project_dir, language) + logger.info("Copying the template") + self._copy_template(project_dir, language) - if self.config: - logger.info("Copying the config") - self._copy_config(project_dir) + if self.config: + logger.info("Copying the config") + self._copy_config(project_dir) - logger.info("Creating main files") - self._create_main_files(project_dir, chunk, language) + logger.info("Creating main files") + self._create_main_files(project_dir, chunk, language) - logger.info("Running qodana") - self._run_qodana(project_dir, results_dir) + logger.info("Running qodana") + self._run_qodana(project_dir, results_dir) - logger.info("Getting inspections") - inspections_files = self._get_inspections_files(results_dir) - inspections = self._parse_inspections_files(inspections_files) + logger.info("Getting inspections") + inspections_files = self._get_inspections_files(results_dir) + inspections = self._parse_inspections_files(inspections_files) - logger.info("Write inspections") - chunk[QodanaColumnName.INSPECTIONS.value] = chunk.apply( - lambda row: inspections.get(row[ColumnName.ID.value], []), axis=1) + logger.info("Write inspections") + chunk[QodanaColumnName.INSPECTIONS.value] = chunk.apply( + lambda row: self._to_json(inspections.get(row[ColumnName.ID.value], [])), axis=1) - return chunk + remove_directory(tmp_file_path) + return chunk @staticmethod def _copy_template(project_dir: Path, language: LanguageVersion): @@ -249,15 +254,9 @@ def _create_main_files(project_dir: Path, chunk: DataFrame, language: LanguageVe @staticmethod def _run_qodana(project_dir: Path, results_dir: Path): results_dir.mkdir() - try: - docker.run( - "jetbrains/qodana", - remove=True, - volumes=[(project_dir, "/data/project/"), (results_dir, "/data/results/")], - user=os.getuid(), - ) - except Exception as e: - logger.exception(f'Error during qodana running: {e}') + command = ['docker', 'run', '--rm', '-v', f'{project_dir}/:/data/project/', '-v', + f'{results_dir}/:/data/results/', 'jetbrains/qodana'] + run_and_wait(command) @staticmethod def _get_inspections_files(results_dir: Path) -> Set[Path]: diff --git a/src/python/evaluation/qodana/util/models.py b/src/python/evaluation/qodana/util/models.py index 0cc7de2f..f5b3a589 100644 --- a/src/python/evaluation/qodana/util/models.py +++ b/src/python/evaluation/qodana/util/models.py @@ -1,3 +1,4 @@ +import json from dataclasses import dataclass from enum import Enum, unique @@ -12,7 +13,45 @@ class QodanaIssue: description: str problem_id: str + def to_json(self) -> str: + issue = { + QodanaJsonField.FRAGMENT_ID.value: self.fragment_id, + QodanaJsonField.LINE.value: self.line, + QodanaJsonField.OFFSET.value: self.offset, + QodanaJsonField.LENGTH.value: self.length, + QodanaJsonField.HIGHLIGHTED_ELEMENT.value: self.highlighted_element, + QodanaJsonField.DESCRIPTION.value: self.description, + QodanaJsonField.PROBLEM_ID.value: self.problem_id, + } + return json.dumps(issue) + + @classmethod + def from_json(cls, str_json: str) -> 'QodanaIssue': + issue = json.loads(str_json) + return QodanaIssue( + fragment_id=issue[QodanaJsonField.FRAGMENT_ID.value], + line=issue[QodanaJsonField.LINE.value], + offset=issue[QodanaJsonField.OFFSET.value], + length=issue[QodanaJsonField.LENGTH.value], + highlighted_element=issue[QodanaJsonField.HIGHLIGHTED_ELEMENT.value], + description=issue[QodanaJsonField.DESCRIPTION.value], + problem_id=issue[QodanaJsonField.PROBLEM_ID.value], + ) + @unique class QodanaColumnName(Enum): INSPECTIONS = 'inspections' + + +@unique +class QodanaJsonField(Enum): + FRAGMENT_ID = 'fragment_id' + LINE = 'line' + OFFSET = 'offset' + LENGTH = 'length' + HIGHLIGHTED_ELEMENT = 'highlighted_element' + DESCRIPTION = 'description' + PROBLEM_ID = 'problem_id' + + ISSUES = 'issues' diff --git a/src/python/review/common/file_system.py b/src/python/review/common/file_system.py index a06daed7..3e2e8bce 100644 --- a/src/python/review/common/file_system.py +++ b/src/python/review/common/file_system.py @@ -2,6 +2,7 @@ import os import pickle import re +import shutil import tempfile from contextlib import contextmanager from enum import Enum, unique @@ -167,6 +168,11 @@ def remove_slash(path: str) -> str: return path.rstrip('/') +def remove_directory(directory: Union[str, Path]) -> None: + if os.path.isdir(directory): + shutil.rmtree(directory, ignore_errors=True) + + def add_slash(path: str) -> str: if not path.endswith('/'): path += '/' diff --git a/src/python/review/common/subprocess_runner.py b/src/python/review/common/subprocess_runner.py index a25cbdcd..2a89ad42 100644 --- a/src/python/review/common/subprocess_runner.py +++ b/src/python/review/common/subprocess_runner.py @@ -21,3 +21,8 @@ def run_in_subprocess(command: List[str]) -> str: logger.debug('%s\'s stderr:\n%s' % (command[0], stderr)) return stdout + + +def run_and_wait(command: List[str]) -> None: + process = subprocess.Popen(command) + process.wait() diff --git a/whitelist.txt b/whitelist.txt index bbf66332..3e331750 100644 --- a/whitelist.txt +++ b/whitelist.txt @@ -113,3 +113,4 @@ iterrows nrows groupby getuid +Popen From 8489c9d0564f0a780521345f18c2f016bfcb8791 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Birillo" Date: Fri, 21 May 2021 13:46:55 +0300 Subject: [PATCH 5/6] Fix a bug with path to the gradle project --- src/python/evaluation/qodana/dataset_marking.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/python/evaluation/qodana/dataset_marking.py b/src/python/evaluation/qodana/dataset_marking.py index 574e7d07..00f17667 100644 --- a/src/python/evaluation/qodana/dataset_marking.py +++ b/src/python/evaluation/qodana/dataset_marking.py @@ -22,7 +22,7 @@ from src.python.evaluation.qodana.util.models import QodanaColumnName, QodanaIssue, QodanaJsonField from src.python.review.application_config import LanguageVersion from src.python.review.common.file_system import ( - create_directory, get_content_from_file, get_name_from_path, get_parent_folder, remove_directory, + create_directory, get_content_from_file, get_name_from_path, get_parent_folder, remove_directory, remove_slash, ) from src.python.review.common.subprocess_runner import run_and_wait from src.python.review.run_tool import positive_int @@ -225,7 +225,8 @@ def _copy_template(project_dir: Path, language: LanguageVersion): or language == LanguageVersion.JAVA_8 or language == LanguageVersion.JAVA_7 ): - shutil.copytree(Path("./project_templates/java"), project_dir, dirs_exist_ok=True) + shutil.copytree(Path(f"{remove_slash(os.path.dirname(os.path.abspath(__file__)))}/project_templates/java"), + project_dir, dirs_exist_ok=True) else: raise NotImplementedError From 371f985f8d6315b8568f48d66727a18f6447d3f8 Mon Sep 17 00:00:00 2001 From: Ilya Vlasov Date: Sat, 22 May 2021 11:31:37 +0300 Subject: [PATCH 6/6] Fixed PR issues --- .../evaluation/qodana/dataset_marking.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/python/evaluation/qodana/dataset_marking.py b/src/python/evaluation/qodana/dataset_marking.py index 00f17667..40f679bd 100644 --- a/src/python/evaluation/qodana/dataset_marking.py +++ b/src/python/evaluation/qodana/dataset_marking.py @@ -157,7 +157,7 @@ def _mark_language(self, df: DataFrame, language: LanguageVersion) -> DataFrame: def _extract_fragment_id(cls, folder_name: str) -> int: numbers = re.findall(r'\d+', folder_name) if len(numbers) != 1: - raise ValueError(f'Can npt extract fragment id from {folder_name}') + raise ValueError(f'Can not extract fragment id from {folder_name}') return numbers[0] @classmethod @@ -187,11 +187,11 @@ def _to_json(cls, issues: List[QodanaIssue]) -> str: return json.dumps(issues_json) def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion, chunk_id: int) -> pd.DataFrame: - tmp_file_path = self.dataset_path.parent.absolute() / f'qodana_project_{chunk_id}' - create_directory(tmp_file_path) + tmp_dir_path = self.dataset_path.parent.absolute() / f'qodana_project_{chunk_id}' + create_directory(tmp_dir_path) - project_dir = tmp_file_path / "project" - results_dir = tmp_file_path / "results" + project_dir = tmp_dir_path / "project" + results_dir = tmp_dir_path / "results" logger.info("Copying the template") self._copy_template(project_dir, language) @@ -214,7 +214,7 @@ def _mark_chunk(self, chunk: DataFrame, language: LanguageVersion, chunk_id: int chunk[QodanaColumnName.INSPECTIONS.value] = chunk.apply( lambda row: self._to_json(inspections.get(row[ColumnName.ID.value], [])), axis=1) - remove_directory(tmp_file_path) + remove_directory(tmp_dir_path) return chunk @staticmethod @@ -242,12 +242,12 @@ def _create_main_files(project_dir: Path, chunk: DataFrame, language: LanguageVe or language == LanguageVersion.JAVA_7 ): working_dir = project_dir / "src" / "main" / "java" - for index, row in chunk.iterrows(): - solution_dir = working_dir / f"solution{index}" + for _, row in chunk.iterrows(): + solution_dir = working_dir / f"solution{row[ColumnName.ID.value]}" solution_dir.mkdir(parents=True) file_path = solution_dir / "Main.java" with open(file_path, "w") as file: - file.write(f"package solution{index};\n\n") + file.write(f"package solution{row[ColumnName.ID.value]};\n\n") file.write(row[ColumnName.CODE.value]) else: raise NotImplementedError @@ -255,7 +255,7 @@ def _create_main_files(project_dir: Path, chunk: DataFrame, language: LanguageVe @staticmethod def _run_qodana(project_dir: Path, results_dir: Path): results_dir.mkdir() - command = ['docker', 'run', '--rm', '-v', f'{project_dir}/:/data/project/', '-v', + command = ['docker', 'run', '-u', str(os.getuid()), '--rm', '-v', f'{project_dir}/:/data/project/', '-v', f'{results_dir}/:/data/results/', 'jetbrains/qodana'] run_and_wait(command)