diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py index 4e7bf2e6..083d6311 100644 --- a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py +++ b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py @@ -38,7 +38,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False) task_df[ComparisonColumnName.TUTOR_ERROR.value].dropna().values)) for cell_errors in errors_list: for error in cell_errors: - self.error_to_freq[error] += 1 + self.error_to_freq[error.strip()] += 1 self.task_to_error_freq[task] += 1 self.fragments_with_error += 1 self.task_to_freq = sort_freq_dict(self.task_to_freq) @@ -91,7 +91,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False) def __parse_issues(issues_str: str) -> List[str]: if pd.isna(issues_str) or issues_str == ERROR_CONST: return [] - return issues_str.split(';') + return list(map(lambda i: i.strip(), issues_str.split(';'))) @staticmethod def __add_issues(issues_dict: Dict[str, int], issues: List[str]) -> None: diff --git a/src/python/evaluation/paper_evaluation/survey_handler/README.md b/src/python/evaluation/paper_evaluation/survey_handler/README.md new file mode 100644 index 00000000..4afe932e --- /dev/null +++ b/src/python/evaluation/paper_evaluation/survey_handler/README.md @@ -0,0 +1,73 @@ +# Surveys handlers + +These scripts allow handling surveys results for the SIGCSE paper. +We have two surveys (for Python and for Java) where participants should choose a fragments +that has better formatting. +Each question in the surveys have randomly orders for fragments. +The left fragment can have good formatting, but at the same time, it can have bad formatting. +To handle these cases we created JSON configs with this information and another one with the results. +These scripts allow processing these config files. + +## Usage + +Run the [survey_statistics_gathering.py](survey_statistics_gathering.py) with the arguments from command line. + +Required arguments: + +`questions_json_path` — path to the JSON with labelled questions; +`results_json_path` — path to the JSON with survey results. + +An example of `questions_json` file: +```json +{ + "questions": [ + { + "number": 1, + "left_fragment": "before_formatting", + "right_fragment": "after_formatting" + }, + { + "number": 2, + "left_fragment": "after_formatting", + "right_fragment": "before_formatting" + } + ] +} +``` + +An example of `results_json` file: + +```json +{ + "questions": [ + { + "number": 1, + "left_fragment": 0, + "right_fragment": 11, + "both": 0 + }, + { + "number": 2, + "left_fragment": 10, + "right_fragment": 0, + "both": 1 + } + ] +} +``` + +An example of the statistics: +```text +total participants=11 +------before----after----any---- +1. 0 11 0 +2. 1 10 0 +3. 0 11 0 +4. 0 11 0 +5. 0 11 0 +6. 1 10 0 +7. 0 11 0 +8. 1 8 2 +9. 0 11 0 +10. 0 8 3 +``` diff --git a/src/python/evaluation/paper_evaluation/survey_handler/__init__.py b/src/python/evaluation/paper_evaluation/survey_handler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics.py b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics.py new file mode 100644 index 00000000..8cfc898b --- /dev/null +++ b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics.py @@ -0,0 +1,61 @@ +from dataclasses import dataclass +from enum import Enum, unique +from typing import Any, Dict, List + + +@dataclass +class Question: + with_formatting_count: int = 0 + without_formatting_count: int = 0 + any_formatting_count: int = 0 + + def get_total(self): + return self.with_formatting_count + self.without_formatting_count + self.any_formatting_count + + +@unique +class SurveyJsonField(Enum): + NUMBER = 'number' + LEFT_FRAGMENT = 'left_fragment' + RIGHT_FRAGMENT = 'right_fragment' + + BEFORE_FORMATTING = 'before_formatting' + BOTH = 'both' + + QUESTIONS = 'questions' + + +@dataclass +class SurveyStatistics: + questions: List[Question] + + def __init__(self, questions_json: List[Dict[str, Any]], results_json: List[Dict[str, int]]): + self.questions = [] + for result_json in results_json: + question_number = result_json[SurveyJsonField.NUMBER.value] + question = self.__find_json_question(questions_json, question_number) + if question[SurveyJsonField.LEFT_FRAGMENT.value] == SurveyJsonField.BEFORE_FORMATTING.value: + without_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value] + with_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value] + else: + without_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value] + with_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value] + any_formatting_count = result_json[SurveyJsonField.BOTH.value] + self.questions.append(Question(with_formatting_count, without_formatting_count, any_formatting_count)) + + @staticmethod + def __find_json_question(questions_json: List[Dict[str, Any]], question_number: int) -> Dict[str, Any]: + for question in questions_json: + if question[SurveyJsonField.NUMBER.value] == question_number: + return question + raise ValueError(f'Did not find question {question_number}') + + def print_stat(self): + if len(self.questions) == 0: + print('No questions found') + return + print(f'total participants={self.questions[0].get_total()}') + print('------before----after----any----') + for index, question in enumerate(self.questions): + print(f'{index + 1}.\t\t{question.without_formatting_count}\t\t{question.with_formatting_count}\t\t ' + f'{question.any_formatting_count}') diff --git a/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics_gathering.py b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics_gathering.py new file mode 100644 index 00000000..82b8b7d3 --- /dev/null +++ b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics_gathering.py @@ -0,0 +1,46 @@ +import argparse +import json +import sys +from pathlib import Path + +from src.python.evaluation.evaluation_run_tool import logger +from src.python.evaluation.paper_evaluation.survey_handler.survey_statistics import SurveyJsonField, SurveyStatistics +from src.python.review.common.file_system import get_content_from_file + + +def configure_arguments(parser: argparse.ArgumentParser) -> None: + parser.add_argument('questions_json_path', + type=lambda value: Path(value).absolute(), + help='Path to the JSON with labelled questions') + + parser.add_argument('results_json_path', + type=lambda value: Path(value).absolute(), + help='Path to the JSON with survey results') + + +def main() -> int: + parser = argparse.ArgumentParser() + configure_arguments(parser) + + try: + args = parser.parse_args() + questions_json = json.loads(get_content_from_file(args.questions_json_path)) + results_json = json.loads(get_content_from_file(args.results_json_path)) + stat = SurveyStatistics( + questions_json[SurveyJsonField.QUESTIONS.value], + results_json[SurveyJsonField.QUESTIONS.value], + ) + stat.print_stat() + return 0 + + except FileNotFoundError: + logger.error('JSON file did not found') + return 2 + + except Exception: + logger.exception('An unexpected error.') + return 2 + + +if __name__ == '__main__': + sys.exit(main())