Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
53dbe88
Merge branch 'main-upd' into develop
nbirillo May 17, 2021
80ea546
Added project template for Java
GirZ0n May 18, 2021
db30732
Added DatasetMarker
GirZ0n May 18, 2021
8c85e36
Code refactoring
GirZ0n May 18, 2021
b1fa21b
Added some words
GirZ0n May 18, 2021
5929f46
Small code refactoring
GirZ0n May 19, 2021
1a34b50
Added new requirements
GirZ0n May 19, 2021
ecbcdf4
Added ID to ColumnName
GirZ0n May 19, 2021
92f9a8a
Added README.md
GirZ0n May 20, 2021
3ef6a42
Added default value for --chunk-size
GirZ0n May 20, 2021
3f028bd
Merge remote-tracking branch 'origin/qodana' into qodana
GirZ0n May 20, 2021
7cd79e0
parse qodana output
nbirillo May 20, 2021
5c4b85a
Merge remote-tracking branch 'origin/develop' into develop
nbirillo May 20, 2021
ec6b477
Merge branch 'develop' into qodana
nbirillo May 20, 2021
a8b80c0
Update README.md
GirZ0n May 20, 2021
75cac7b
Change qodana scipt output
nbirillo May 20, 2021
dd9d502
Merge remote-tracking branch 'origin/develop' into qodana
GirZ0n May 20, 2021
6915254
Merge remote-tracking branch 'origin/qodana' into qodana
nbirillo May 20, 2021
f0c098b
Merge branch 'qodana' into fix/qodana-output
nbirillo May 20, 2021
c428b78
Fix a bug with qodana
nbirillo May 21, 2021
8489c9d
Fix a bug with path to the gradle project
nbirillo May 21, 2021
371f985
Fixed PR issues
GirZ0n May 22, 2021
0dab1b7
Fix/qodana output (#33)
nbirillo May 22, 2021
f9b418d
Added is_java function
GirZ0n May 23, 2021
96c0518
1) Added copy_directory and copy_file functions;
GirZ0n May 23, 2021
38a936a
Removed python_on_whales dependency
GirZ0n May 23, 2021
235e60f
Fixed some PR issues
GirZ0n May 23, 2021
5faa46c
Merge branch 'fix/qodana-output' into qodana
GirZ0n May 23, 2021
6c73c44
Fixed tests
GirZ0n May 23, 2021
b73b9b9
Added script
GirZ0n May 23, 2021
f753b2b
Added new column names and updated script_structure_rule
GirZ0n May 23, 2021
cbbf8b8
Added 'inplace'
GirZ0n May 23, 2021
dee07dd
Added support for --with-history flag
GirZ0n May 23, 2021
bd95471
Added support for --with-history flag
GirZ0n May 23, 2021
ccd9301
Small fix
GirZ0n Jun 11, 2021
5f4d7b9
Added from_language_version function
GirZ0n Jun 11, 2021
adcdc32
Added support for multiple languages
GirZ0n Jun 11, 2021
9301f3a
Small fix
GirZ0n Jun 11, 2021
93e8a23
Added description for generate_history.py
GirZ0n Jun 11, 2021
71a0127
Merge branch 'develop' into history-script
GirZ0n Jun 11, 2021
aad39bc
Small code refactoring
GirZ0n Jun 11, 2021
b9e3ef6
typo fix
GirZ0n Jun 11, 2021
28641e7
Update README.md
GirZ0n Jun 11, 2021
8f85f4f
Fixed tests
GirZ0n Jun 11, 2021
53068aa
Small fix
GirZ0n Jun 11, 2021
10303eb
Merge remote-tracking branch 'origin/history-script' into history-script
GirZ0n Jun 11, 2021
b7fb4d0
Small fix
GirZ0n Jun 11, 2021
82fc856
Fixed flake8 issues
GirZ0n Jun 11, 2021
eeb8815
Removed duplicate
GirZ0n Jun 12, 2021
498d7fc
Fixed tests
GirZ0n Jun 12, 2021
871b543
Added support for xlsx
GirZ0n Jun 12, 2021
20aec69
Merge branch 'develop' into history-script
GirZ0n Jun 15, 2021
db7663a
Fixed PR issues
GirZ0n Jun 16, 2021
1f3f84e
Merge branch 'develop' into history-script
GirZ0n Jun 16, 2021
57d19f2
Merge branch 'develop' into history-script
GirZ0n Jun 18, 2021
0080881
Removed old unnecessary files
GirZ0n Jun 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/python/evaluation/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class ColumnName(Enum):
PENALTY = 'penalty'
USER = 'user'
HISTORY = 'history'
TIME = 'time'
TRACEBACK = 'traceback'


Expand Down
6 changes: 5 additions & 1 deletion src/python/evaluation/evaluation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def __init__(self, args: Namespace):
self.format: str = args.format
self.solutions_file_path: Union[str, Path] = args.solutions_file_path
self.traceback: bool = args.traceback
self.with_history: bool = args.with_history
self.output_folder_path: Union[str, Path] = args.output_folder_path
self.extension: Extension = get_restricted_extension(self.solutions_file_path, [Extension.XLSX, Extension.CSV])
self.__init_output_file_name(args.output_file_name)
Expand All @@ -32,12 +33,15 @@ def __init_output_file_name(self, output_file_name: Optional[str]):
else:
self.output_file_name = output_file_name

def build_command(self, inspected_file_path: Union[str, Path], lang: str) -> List[str]:
def build_command(self, inspected_file_path: Union[str, Path], lang: str, history: Optional[str]) -> List[str]:
command = [LanguageVersion.PYTHON_3.value,
self.tool_path,
inspected_file_path,
RunToolArgument.FORMAT.value.short_name, self.format]

if self.with_history and history is not None:
command.extend([RunToolArgument.HISTORY.value.long_name, history])

if lang == LanguageVersion.JAVA_8.value or lang == LanguageVersion.JAVA_11.value:
command.extend([RunToolArgument.LANG_VERSION.value.long_name, lang])
return command
Expand Down
15 changes: 12 additions & 3 deletions src/python/evaluation/evaluation_run_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import traceback
from pathlib import Path
from typing import Optional

sys.path.append('')
sys.path.append('../../..')
Expand Down Expand Up @@ -63,6 +64,12 @@ def configure_arguments(parser: argparse.ArgumentParser) -> None:
f'Use this argument when {EvaluationArgument.TRACEBACK.value} argument'
'is enabled argument will not be used otherwise.')

parser.add_argument('--with-history',
help=f'If True, then history will be taken into account when calculating the grade. '
f'In that case, for each fragment, the "{ColumnName.HISTORY.value}" column '
'must contain the history of previous errors.',
action='store_true')


def get_language(lang_key: str) -> LanguageVersion:
try:
Expand All @@ -73,14 +80,14 @@ def get_language(lang_key: str) -> LanguageVersion:
raise KeyError(e)


def __inspect_row(lang: str, code: str, fragment_id: int, config: EvaluationConfig) -> str:
def __inspect_row(lang: str, code: str, fragment_id: int, history: Optional[str], config: EvaluationConfig) -> str:
print(f'current id: {fragment_id}')
# Tool does not work correctly with tmp files from <tempfile> module on macOS
# thus we create a real file in the file system
extension = get_language(lang).extension_by_language().value
tmp_file_path = config.solutions_file_path.parent.absolute() / f'inspected_code_{fragment_id}{extension}'
temp_file = next(create_file(tmp_file_path, code))
command = config.build_command(temp_file, lang)
command = config.build_command(temp_file, lang, history)
results = run_in_subprocess(command)
os.remove(temp_file)
return results
Expand All @@ -103,7 +110,9 @@ def inspect_solutions_df(config: EvaluationConfig, lang_code_dataframe: pd.DataF
lang_code_dataframe[ColumnName.TRACEBACK.value] = lang_code_dataframe.parallel_apply(
lambda row: __inspect_row(row[ColumnName.LANG.value],
row[ColumnName.CODE.value],
row[ColumnName.ID.value], config), axis=1)
row[ColumnName.ID.value],
row.get(ColumnName.HISTORY.value),
config), axis=1)

lang_code_dataframe[ColumnName.GRADE.value] = lang_code_dataframe.parallel_apply(
lambda row: __get_grade_from_traceback(row[ColumnName.TRACEBACK.value]), axis=1)
Expand Down
34 changes: 34 additions & 0 deletions src/python/evaluation/inspectors/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This module contains _preprocessing_ stage and _analysing_ stage.
the `csv` or `xslx` file with student solutions and drop duplicates of code fragments (optional);
- [distribute_grades.py](distribute_grades.py) allows distributing calculated grades and traceback
for unique solutions into all solutions.
- [generate_history.py](generate_history.py) allows you to generate history based on issues from previous solutions.

`Analysing` stage includes:
- [diffs_between_df.py](diffs_between_df.py) allows finding a difference between
Expand Down Expand Up @@ -80,6 +81,39 @@ Required arguments:

The resulting file will be stored in the same folder as the input file with all samples.

----

### Generate history

[generate_history.py](generate_history.py) allows you to generate history based on issues from previous solutions.

Please, note that your solutions file should consist of at least 4 obligatory columns:

- `user`,
- `lang`,
- `time`,
- `traceback`.

You can get such a file with [evaluation_run_tool.py](../evaluation_run_tool.py).

The output file is a new `xlsx` or `csv` (the same format with the input files) file with all columns from the input
except for `traceback` and `grade` (this behavior can be changed when you run the script).

#### Usage

Run the [generate_history.py](generate_history.py) with the arguments from command line.

Required argument:

- `solutions_file_path` — path to xlsx-file or csv-file with necessary columns,

Optional arguments:
Argument | Description
--- | ---
|**&#8209;o**, **&#8209;&#8209;output&#8209;path**| The path where the dataset with history will be saved. If not specified, the dataset will be saved next to the original one. |
|**&#8209;&#8209;to&#8209;drop&#8209;traceback**| The `traceback` column will be removed from the final dataset. Default is false. |
|**&#8209;&#8209;to&#8209;drop&#8209;grades**| The `grade` column will be removed from the final dataset. Default is false.|

___

## Analysing
Expand Down
131 changes: 131 additions & 0 deletions src/python/evaluation/inspectors/generate_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import argparse
import json
from collections import Counter
from pathlib import Path

import pandas as pd
from pandarallel import pandarallel
from src.python.common.tool_arguments import RunToolArgument
from src.python.evaluation.common.pandas_util import (
get_issues_from_json,
get_solutions_df_by_file_path,
write_df_to_file,
)
from src.python.evaluation.common.util import ColumnName, EvaluationArgument
from src.python.evaluation.evaluation_run_tool import get_language
from src.python.review.common.file_system import (
Extension,
get_name_from_path,
get_parent_folder,
get_restricted_extension,
)
from src.python.review.common.language import Language

TRACEBACK = EvaluationArgument.TRACEBACK.value
GRADE = ColumnName.GRADE.value
HISTORY = ColumnName.HISTORY.value
USER = ColumnName.USER.value
LANG = ColumnName.LANG.value
TIME = ColumnName.TIME.value
EXTRACTED_ISSUES = 'extracted_issues'


def configure_arguments(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
RunToolArgument.SOLUTIONS_FILE_PATH.value.long_name,
type=lambda value: Path(value).absolute(),
help=f'Path to csv or xlsx file. Your dataset must include column-names: '
f'"{USER}", "{LANG}", "{TIME}, "{TRACEBACK}".',
)

parser.add_argument(
'-o', '--output-path',
type=lambda value: Path(value).absolute(),
help='The path where the dataset with history will be saved. '
'If not specified, the dataset will be saved next to the original one.',
)

parser.add_argument(
'--to-drop-traceback',
help=f'The "{TRACEBACK}" column will be removed from the final dataset.',
action='store_true',
)

parser.add_argument(
'--to-drop-grade',
help=f'The "{GRADE}" column will be removed from the final dataset.',
action='store_true',
)


def _update_counter(extracted_issues: str, counter: Counter) -> None:
issue_classes = []
if extracted_issues:
issue_classes = extracted_issues.split(',')

counter.update(issue_classes)


def _add_history(row, solutions_df: pd.DataFrame) -> str:
counter = Counter()

filtered_df = solutions_df[
(solutions_df[USER] == row[USER]) & (solutions_df[LANG] == row[LANG]) & (solutions_df[TIME] < row[TIME])
]
filtered_df.apply(lambda row: _update_counter(row[EXTRACTED_ISSUES], counter), axis=1)

history = {}

# If we were unable to identify the language version, we return an empty history
try:
lang_version = get_language(row[LANG])
except KeyError:
return json.dumps(history)

lang = Language.from_language_version(lang_version)
if len(counter) != 0:
history = {lang.value.lower(): [{'origin_class': key, 'number': value} for key, value in counter.items()]}

return json.dumps(history)


def _extract_issues(traceback: str) -> str:
issues = get_issues_from_json(traceback)
issue_classes = [issue.origin_class for issue in issues]
return ','.join(issue_classes)


def main():
parser = argparse.ArgumentParser()
configure_arguments(parser)
args = parser.parse_args()

pandarallel.initialize()

solutions_file_path = args.solutions_file_path
solutions_df = get_solutions_df_by_file_path(solutions_file_path)
solutions_df[EXTRACTED_ISSUES] = solutions_df.parallel_apply(lambda row: _extract_issues(row[TRACEBACK]), axis=1)
solutions_df[HISTORY] = solutions_df.parallel_apply(_add_history, axis=1, args=(solutions_df,))

columns_to_drop = [EXTRACTED_ISSUES]

if args.to_drop_grade:
columns_to_drop.append(GRADE)

if args.to_drop_traceback:
columns_to_drop.append(TRACEBACK)

solutions_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

output_path = args.output_path
if output_path is None:
output_dir = get_parent_folder(solutions_file_path)
dataset_name = get_name_from_path(solutions_file_path, with_extension=False)
output_path = output_dir / f'{dataset_name}_with_history{Extension.CSV.value}'

output_ext = get_restricted_extension(solutions_file_path, [Extension.XLSX, Extension.CSV])
write_df_to_file(solutions_df, output_path, output_ext)


if __name__ == '__main__':
main()
14 changes: 14 additions & 0 deletions src/python/review/common/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from typing import List

from src.python.review.application_config import LanguageVersion
from src.python.review.common.file_system import Extension, get_extension_from_file


Expand All @@ -13,6 +14,19 @@ class Language(Enum):
JS = 'JAVASCRIPT'
UNKNOWN = 'UNKNOWN'

@staticmethod
def from_language_version(language_version: LanguageVersion) -> 'Language':
version_to_lang = {
LanguageVersion.PYTHON_3: Language.PYTHON,
LanguageVersion.JAVA_7: Language.JAVA,
LanguageVersion.JAVA_8: Language.JAVA,
LanguageVersion.JAVA_9: Language.JAVA,
LanguageVersion.JAVA_11: Language.JAVA,
LanguageVersion.KOTLIN: Language.KOTLIN,
}

return version_to_lang.get(language_version, Language.UNKNOWN)

@classmethod
def values(cls) -> List[str]:
return [member.value for member in Language]
Expand Down
2 changes: 1 addition & 1 deletion src/python/review/quality/penalty.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_previous_issues_by_language(lang_to_history: Optional[str], language: La
return []

language_to_history = json.loads(lang_to_history)
history = language_to_history[language.value.lower()]
history = language_to_history.get(language.value.lower(), [])

previous_issues = []
for issue_data in history:
Expand Down
5 changes: 3 additions & 2 deletions test/python/common/file_system/test_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from test.python.common import FILE_SYSTEM_DATA_FOLDER
from test.python.evaluation.testing_config import get_testing_arguments
from typing import Optional

import pytest
from src.python.evaluation.evaluation_config import EvaluationConfig
Expand All @@ -15,8 +16,8 @@
]


def inspect_code(config: EvaluationConfig, file: str, language: LanguageVersion) -> str:
command = config.build_command(file, language.value)
def inspect_code(config: EvaluationConfig, file: str, language: LanguageVersion, history: Optional[str] = None) -> str:
command = config.build_command(file, language.value, history)
return run_in_subprocess(command)


Expand Down
8 changes: 6 additions & 2 deletions test/python/evaluation/testing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@
from src.python.review.reviewers.perform_review import OutputFormat


def get_testing_arguments(to_add_traceback=None, to_add_tool_path=None) -> Namespace:
def get_testing_arguments(to_add_traceback=None, to_add_tool_path=None, to_add_history=None) -> Namespace:
testing_arguments = Namespace(format=OutputFormat.JSON.value,
output_file_name=EvaluationArgument.RESULT_FILE_NAME_XLSX.value,
output_folder_path=None)
output_folder_path=None,
with_history=False)
if to_add_traceback:
testing_arguments.traceback = True

if to_add_tool_path:
testing_arguments.tool_path = MAIN_FOLDER.parent / 'review/run_tool.py'

if to_add_history:
testing_arguments.with_history = True

testing_arguments.solutions_file_path = None

return testing_arguments
2 changes: 1 addition & 1 deletion whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ Measurer
ndarray
Runtime
matcher
pathlib
pathlib