Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.0
1.2.0
3 changes: 2 additions & 1 deletion requirements-evaluation.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
openpyxl==3.0.7
pandas==1.2.3
pandas==1.2.3
pandarallel
13 changes: 13 additions & 0 deletions src/python/common/tool_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from enum import Enum, unique
from typing import List, Optional

from src.python.evaluation.common.util import ColumnName
from src.python.review.application_config import LanguageVersion
from src.python.review.inspectors.inspector_type import InspectorType

Expand Down Expand Up @@ -76,3 +77,15 @@ class RunToolArgument(Enum):
HISTORY = ArgumentsInfo(None, '--history',
'Json string, which contains lists of issues in the previous submissions '
'for other tasks for one user.')

SOLUTIONS_FILE_PATH = ArgumentsInfo(None, 'solutions_file_path',
'Local XLSX-file or CSV-file path. '
'Your file must include column-names: '
f'"{ColumnName.CODE.value}" and '
f'"{ColumnName.LANG.value}". Acceptable values for '
f'"{ColumnName.LANG.value}" column are: '
f'{LanguageVersion.PYTHON_3.value}, {LanguageVersion.JAVA_8.value}, '
f'{LanguageVersion.JAVA_11.value}, {LanguageVersion.KOTLIN.value}.')

DIFFS_FILE_PATH = ArgumentsInfo(None, 'diffs_file_path',
'Path to a file with serialized diffs that were founded by diffs_between_df.py')
24 changes: 14 additions & 10 deletions src/python/evaluation/README.md
Original file line number Diff line number Diff line change
@@ -1,31 +1,35 @@
# Hyperstyle evaluation

This tool allows running the `Hyperstyle` tool on an xlsx table to get code quality for all code fragments. Please, note that your input file should consist of at least 2 obligatory columns to run xlsx-tool on its code fragments:
This tool allows running the `Hyperstyle` tool on a `xlsx` or `csv` table to get code quality for all code fragments.
Please, note that your input file should consist of at least 2 obligatory columns to run the tool on its code fragments:

- `code`
- `lang`

Possible values for column `lang` are: `python3`, `kotlin`, `java8`, `java11`.

Output file is a new `xlsx` file with 3 columns:
- `code`
- `lang`
Output file is a new `xlsx` or `csv` file with the all columns from the input file and two additional ones:
- `grade`
Grade assessment is conducted by [`run_tool.py`](https://github.com/hyperskill/hyperstyle/blob/main/README.md) with default arguments. Avaliable values for column `grade` are: BAD, MODERATE, GOOD, EXCELLENT. It is also possible add fourth column: `traceback` to get full inspectors feedback on each code fragment. More details on enabling traceback column in **Optional Arguments** table.
- `traceback` (optional)

Grade assessment is conducted by [`run_tool.py`](https://github.com/hyperskill/hyperstyle/blob/main/README.md) with default arguments.
Avaliable values for column `grade` are: BAD, MODERATE, GOOD, EXCELLENT.
`traceback` column stores full inspectors feedback on each code fragment.
More details on enabling traceback column in **Optional Arguments** table.

## Usage

Run the [xlsx_run_tool.py](xlsx_run_tool.py) with the arguments from command line.
Run the [evaluation_run_tool.py](evaluation_run_tool.py) with the arguments from command line.

Required arguments:

`xlsx_file_path` — path to xlsx-file with code samples to inspect.
`solutions_file_path` — path to xlsx-file or csv-file with code samples to inspect.

Optional arguments:
Argument | Description
--- | ---
|**‑f**, **‑‑format**| The output format. Available values: `json`, `text`. The default value is `json` . Use this argument when `traceback` is enabled, otherwise it will not be used.|
|**‑tp**, **‑‑tool_path**| Path to run-tool. Default is `src/python/review/run_tool.py` .|
|**‑tr**, **‑‑traceback**| To include a column with errors traceback into an output file. Default is `False`.|
|**‑ofp**, **‑‑output_folder_path**| An explicit folder path to store file with results. Default is a parent directory of a folder with xlsx-file sent for inspection. |
|**‑ofn**, **‑‑output_file_name**| A name of an output file where evaluation results will be stored. Default is `results.xlsx`.|
|**‑‑traceback**| To include a column with errors traceback into an output file. Default is `False`.|
|**‑ofp**, **‑‑output_folder_path**| An explicit folder path to store file with results. Default is a parent directory of a folder with xlsx-file or csv-file sent for inspection. |
|**‑ofn**, **‑‑output_file_name**| A name of an output file where evaluation results will be stored. Default is `results.xlsx` or `results.csv`.|
13 changes: 13 additions & 0 deletions src/python/evaluation/common/csv_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pathlib import Path
from typing import Union

import pandas as pd
from src.python.review.common.file_system import Encoding


def write_dataframe_to_csv(csv_file_path: Union[str, Path], df: pd.DataFrame) -> None:
# Get error with this encoding=ENCODING on several fragments. So change it then to 'utf8'
try:
df.to_csv(csv_file_path, encoding=Encoding.ISO_ENCODING.value, index=False)
except UnicodeEncodeError:
df.to_csv(csv_file_path, encoding=Encoding.UTF_ENCODING.value, index=False)
103 changes: 103 additions & 0 deletions src/python/evaluation/common/pandas_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import json
import logging
from pathlib import Path
from typing import Any, List, Set, Union

import numpy as np
import pandas as pd
from src.python.evaluation.common.csv_util import write_dataframe_to_csv
from src.python.evaluation.common.util import ColumnName, EvaluationArgument
from src.python.evaluation.common.xlsx_util import create_workbook, remove_sheet, write_dataframe_to_xlsx_sheet
from src.python.review.application_config import LanguageVersion
from src.python.review.common.file_system import Extension, get_restricted_extension
from src.python.review.inspectors.issue import BaseIssue
from src.python.review.reviewers.utils.print_review import convert_json_to_issues

logger = logging.getLogger(__name__)


def filter_df_by_language(df: pd.DataFrame, languages: Set[LanguageVersion],
column: str = ColumnName.LANG.value) -> pd.DataFrame:
return df.loc[df[column].isin(set(map(lambda l: l.value, languages)))]


def filter_df_by_condition(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame:
return df.loc[df[column] == value]


def drop_duplicates(df: pd.DataFrame, column: str = ColumnName.CODE.value) -> pd.DataFrame:
return df.drop_duplicates(column, keep='last')


# Find all rows and columns where two dataframes are inconsistent.
# For example:
# row | column |
# -------------------------
# 3 | column_1 | True
# | column_2 | True
# -------------------------
# 4 | column_1 | True
# | column_2 | True
# means first and second dataframes have different values
# in column_1 and in column_2 in 3-th and 4-th rows
def get_inconsistent_positions(first: pd.DataFrame, second: pd.DataFrame) -> pd.DataFrame:
ne_stacked = (first != second).stack()
inconsistent_positions = ne_stacked[ne_stacked]
inconsistent_positions.index.names = [ColumnName.ROW.value, ColumnName.COLUMN.value]
return inconsistent_positions


# Create a new dataframe with all items that are different.
# For example:
# | old | new
# ---------------------------------
# row column | |
# 3 grade | EXCELLENT | MODERATE
# 4 grade | EXCELLENT | BAD
def get_diffs(first: pd.DataFrame, second: pd.DataFrame) -> pd.DataFrame:
changed = get_inconsistent_positions(first, second)

difference_locations = np.where(first != second)
changed_from = first.values[difference_locations]
changed_to = second.values[difference_locations]
return pd.DataFrame({
ColumnName.OLD.value: changed_from,
ColumnName.NEW.value: changed_to},
index=changed.index)


def get_solutions_df(ext: Extension, file_path: Union[str, Path]) -> pd.DataFrame:
try:
if ext == Extension.XLSX:
lang_code_dataframe = pd.read_excel(file_path)
else:
lang_code_dataframe = pd.read_csv(file_path)
except FileNotFoundError as e:
logger.error('XLSX-file or CSV-file with the specified name does not exists.')
raise e

return lang_code_dataframe


def get_solutions_df_by_file_path(path: Path) -> pd.DataFrame:
ext = get_restricted_extension(path, [Extension.XLSX, Extension.CSV])
return get_solutions_df(ext, path)


def write_df_to_file(df: pd.DataFrame, output_file_path: Path, extension: Extension) -> None:
if extension == Extension.CSV:
write_dataframe_to_csv(output_file_path, df)
elif extension == Extension.XLSX:
create_workbook(output_file_path)
write_dataframe_to_xlsx_sheet(output_file_path, df, 'inspection_results')
# remove empty sheet that was initially created with the workbook
remove_sheet(output_file_path, 'Sheet')


def get_issues_from_json(str_json: str) -> List[BaseIssue]:
parsed_json = json.loads(str_json)['issues']
return convert_json_to_issues(parsed_json)


def get_issues_by_row(df: pd.DataFrame, row: int) -> List[BaseIssue]:
return get_issues_from_json(df.iloc[row][EvaluationArgument.TRACEBACK.value])
47 changes: 27 additions & 20 deletions src/python/evaluation/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,36 @@

@unique
class ColumnName(Enum):
CODE = "code"
LANG = "lang"
LANGUAGE = "language"
GRADE = "grade"
CODE = 'code'
LANG = 'lang'
LANGUAGE = 'language'
GRADE = 'grade'
ID = 'id'
COLUMN = 'column'
ROW = 'row'
OLD = 'old'
NEW = 'new'
IS_PUBLIC = 'is_public'


@unique
class EvaluationArgument(Enum):
TRACEBACK = "traceback"
RESULT_FILE_NAME = "results"
RESULT_FILE_NAME_EXT = f"{RESULT_FILE_NAME}{Extension.XLSX.value}"
TRACEBACK = 'traceback'
RESULT_FILE_NAME = 'evaluation_results'
RESULT_FILE_NAME_XLSX = f'{RESULT_FILE_NAME}{Extension.XLSX.value}'
RESULT_FILE_NAME_CSV = f'{RESULT_FILE_NAME}{Extension.CSV.value}'


script_structure_rule = ("Please, make sure your XLSX-file matches following script standards: \n"
"1. Your XLSX-file should have 2 obligatory columns named:"
f"'{ColumnName.CODE.value}' & '{ColumnName.LANG.value}'. \n"
f"'{ColumnName.CODE.value}' column -- relates to the code-sample. \n"
f"'{ColumnName.LANG.value}' column -- relates to the language of a "
"particular code-sample. \n"
"2. Your code samples should belong to the one of the supported languages. \n"
"Supported languages are: Java, Kotlin, Python. \n"
f"3. Check that '{ColumnName.LANG.value}' column cells are filled with "
"acceptable language-names: \n"
f"Acceptable language-names are: {LanguageVersion.PYTHON_3.value}, "
f"{LanguageVersion.JAVA_8.value} ,"
f"{LanguageVersion.JAVA_11.value} and {LanguageVersion.KOTLIN.value}.")
script_structure_rule = ('Please, make sure your XLSX-file matches following script standards: \n'
'1. Your XLSX-file or CSV-file should have 2 obligatory columns named:'
f'"{ColumnName.CODE.value}" & "{ColumnName.LANG.value}". \n'
f'"{ColumnName.CODE.value}" column -- relates to the code-sample. \n'
f'"{ColumnName.LANG.value}" column -- relates to the language of a '
'particular code-sample. \n'
'2. Your code samples should belong to the one of the supported languages. \n'
'Supported languages are: Java, Kotlin, Python. \n'
f'3. Check that "{ColumnName.LANG.value}" column cells are filled with '
'acceptable language-names: \n'
f'Acceptable language-names are: {LanguageVersion.PYTHON_3.value}, '
f'{LanguageVersion.JAVA_8.value} ,'
f'{LanguageVersion.JAVA_11.value} and {LanguageVersion.KOTLIN.value}.')
8 changes: 3 additions & 5 deletions src/python/evaluation/common/xlsx_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import pandas as pd
from openpyxl import load_workbook, Workbook
from src.python.evaluation.evaluation_config import EvaluationConfig

logger = logging.getLogger(__name__)

Expand All @@ -24,11 +23,10 @@ def remove_sheet(workbook_path: Union[str, Path], sheet_name: str, to_raise_erro
logger.info(message)


def create_and_get_workbook_path(config: EvaluationConfig) -> Path:
def create_workbook(output_file_path: Path) -> Workbook:
workbook = Workbook()
workbook_path = config.get_output_file_path()
workbook.save(workbook_path)
return workbook_path
workbook.save(output_file_path)
return workbook


def write_dataframe_to_xlsx_sheet(xlsx_file_path: Union[str, Path], df: pd.DataFrame, sheet_name: str,
Expand Down
26 changes: 18 additions & 8 deletions src/python/evaluation/evaluation_config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import logging.config
from argparse import Namespace
from pathlib import Path
from typing import List, Union
from typing import List, Optional, Union

from src.python.common.tool_arguments import RunToolArgument
from src.python.evaluation.common.util import EvaluationArgument
from src.python.review.application_config import LanguageVersion
from src.python.review.common.file_system import create_directory
from src.python.review.common.file_system import (
create_directory,
Extension,
get_parent_folder,
get_restricted_extension,
)

logger = logging.getLogger(__name__)

Expand All @@ -15,10 +20,17 @@ class EvaluationConfig:
def __init__(self, args: Namespace):
self.tool_path: Union[str, Path] = args.tool_path
self.output_format: str = args.format
self.xlsx_file_path: Union[str, Path] = args.xlsx_file_path
self.solutions_file_path: Union[str, Path] = args.solutions_file_path
self.traceback: bool = args.traceback
self.output_folder_path: Union[str, Path] = args.output_folder_path
self.output_file_name: str = args.output_file_name
self.extension: Extension = get_restricted_extension(self.solutions_file_path, [Extension.XLSX, Extension.CSV])
self.__init_output_file_name(args.output_file_name)

def __init_output_file_name(self, output_file_name: Optional[str]):
if output_file_name is None:
self.output_file_name = f'{EvaluationArgument.RESULT_FILE_NAME.value}{self.extension.value}'
else:
self.output_file_name = output_file_name

def build_command(self, inspected_file_path: Union[str, Path], lang: str) -> List[str]:
command = [LanguageVersion.PYTHON_3.value,
Expand All @@ -33,11 +45,9 @@ def build_command(self, inspected_file_path: Union[str, Path], lang: str) -> Lis
def get_output_file_path(self) -> Path:
if self.output_folder_path is None:
try:
self.output_folder_path = (
Path(self.xlsx_file_path).parent.parent / EvaluationArgument.RESULT_FILE_NAME.value
)
self.output_folder_path = get_parent_folder(Path(self.solutions_file_path))
create_directory(self.output_folder_path)
except FileNotFoundError as e:
logger.error('XLSX-file with the specified name does not exists.')
logger.error('XLSX-file or CSV-file with the specified name does not exists.')
raise e
return Path(self.output_folder_path) / self.output_file_name
Loading