hyperskill · nbirillo · Aug 2, 2021 · Jul 29, 2021 · Jul 29, 2021
diff --git a/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py b/src/python/evaluation/paper_evaluation/comparison_with_other_tools/tutor_statistics.py
@@ -38,7 +38,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
                                    task_df[ComparisonColumnName.TUTOR_ERROR.value].dropna().values))
             for cell_errors in errors_list:
                 for error in cell_errors:
-                    self.error_to_freq[error] += 1
+                    self.error_to_freq[error.strip()] += 1
                 self.task_to_error_freq[task] += 1
                 self.fragments_with_error += 1
         self.task_to_freq = sort_freq_dict(self.task_to_freq)
@@ -91,7 +91,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
     def __parse_issues(issues_str: str) -> List[str]:
         if pd.isna(issues_str) or issues_str == ERROR_CONST:
             return []
-        return issues_str.split(';')
+        return list(map(lambda i: i.strip(), issues_str.split(';')))
 
     @staticmethod
     def __add_issues(issues_dict: Dict[str, int], issues: List[str]) -> None:

diff --git a/src/python/evaluation/paper_evaluation/survey_handler/README.md b/src/python/evaluation/paper_evaluation/survey_handler/README.md
@@ -0,0 +1,73 @@
+# Surveys handlers
+
+These scripts allow handling surveys results for the SIGCSE paper. 
+We have two surveys (for Python and for Java) where participants should choose a fragments 
+that has better formatting.
+Each question in the surveys have randomly orders for fragments. 
+The left fragment can have good formatting, but at the same time, it can have bad formatting.
+To handle these cases we created JSON configs with this information and another one with the results. 
+These scripts allow processing these config files.
+
+## Usage
+
+Run the [survey_statistics_gathering.py](survey_statistics_gathering.py) with the arguments from command line.
+
+Required arguments:
+
+`questions_json_path` — path to the JSON with labelled questions;
+`results_json_path` — path to the JSON with survey results.
+
+An example of `questions_json` file:
+```json
+{
+  "questions": [
+    {
+      "number": 1,
+      "left_fragment": "before_formatting",
+      "right_fragment": "after_formatting"
+    },
+    {
+      "number": 2,
+      "left_fragment": "after_formatting",
+      "right_fragment": "before_formatting"
+    }
+  ]
+}
+```
+
+An example of `results_json` file:
+
+```json
+{
+  "questions": [
+    {
+      "number": 1,
+      "left_fragment": 0,
+      "right_fragment": 11,
+      "both": 0
+    },
+    {
+      "number": 2,
+      "left_fragment": 10,
+      "right_fragment": 0,
+      "both": 1
+    }
+  ]
+}
+```
+
+An example of the statistics:
+```text
+total participants=11
+------before----after----any----
+1.		0		11		  0
+2.		1		10		  0
+3.		0		11		  0
+4.		0		11		  0
+5.		0		11		  0
+6.		1		10		  0
+7.		0		11		  0
+8.		1		8		  2
+9.		0		11		  0
+10.		0		8		  3
+```
diff --git a/src/python/evaluation/paper_evaluation/survey_handler/__init__.py b/src/python/evaluation/paper_evaluation/survey_handler/__init__.py
diff --git a/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics.py b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics.py
@@ -0,0 +1,61 @@
+from dataclasses import dataclass
+from enum import Enum, unique
+from typing import Any, Dict, List
+
+
+@dataclass
+class Question:
+    with_formatting_count: int = 0
+    without_formatting_count: int = 0
+    any_formatting_count: int = 0
+
+    def get_total(self):
+        return self.with_formatting_count + self.without_formatting_count + self.any_formatting_count
+
+
+@unique
+class SurveyJsonField(Enum):
+    NUMBER = 'number'
+    LEFT_FRAGMENT = 'left_fragment'
+    RIGHT_FRAGMENT = 'right_fragment'
+
+    BEFORE_FORMATTING = 'before_formatting'
+    BOTH = 'both'
+
+    QUESTIONS = 'questions'
+
+
+@dataclass
+class SurveyStatistics:
+    questions: List[Question]
+
+    def __init__(self, questions_json: List[Dict[str, Any]], results_json: List[Dict[str, int]]):
+        self.questions = []
+        for result_json in results_json:
+            question_number = result_json[SurveyJsonField.NUMBER.value]
+            question = self.__find_json_question(questions_json, question_number)
+            if question[SurveyJsonField.LEFT_FRAGMENT.value] == SurveyJsonField.BEFORE_FORMATTING.value:
+                without_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value]
+                with_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value]
+            else:
+                without_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value]
+                with_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value]
+            any_formatting_count = result_json[SurveyJsonField.BOTH.value]
+            self.questions.append(Question(with_formatting_count, without_formatting_count, any_formatting_count))
+
+    @staticmethod
+    def __find_json_question(questions_json: List[Dict[str, Any]], question_number: int) -> Dict[str, Any]:
+        for question in questions_json:
+            if question[SurveyJsonField.NUMBER.value] == question_number:
+                return question
+        raise ValueError(f'Did not find question {question_number}')
+
+    def print_stat(self):
+        if len(self.questions) == 0:
+            print('No questions found')
+            return
+        print(f'total participants={self.questions[0].get_total()}')
+        print('------before----after----any----')
+        for index, question in enumerate(self.questions):
+            print(f'{index + 1}.\t\t{question.without_formatting_count}\t\t{question.with_formatting_count}\t\t  '
+                  f'{question.any_formatting_count}')
diff --git a/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics_gathering.py b/src/python/evaluation/paper_evaluation/survey_handler/survey_statistics_gathering.py
@@ -0,0 +1,46 @@
+import argparse
+import json
+import sys
+from pathlib import Path
+
+from src.python.evaluation.evaluation_run_tool import logger
+from src.python.evaluation.paper_evaluation.survey_handler.survey_statistics import SurveyJsonField, SurveyStatistics
+from src.python.review.common.file_system import get_content_from_file
+
+
+def configure_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument('questions_json_path',
+                        type=lambda value: Path(value).absolute(),
+                        help='Path to the JSON with labelled questions')
+
+    parser.add_argument('results_json_path',
+                        type=lambda value: Path(value).absolute(),
+                        help='Path to the JSON with survey results')
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    configure_arguments(parser)
+
+    try:
+        args = parser.parse_args()
+        questions_json = json.loads(get_content_from_file(args.questions_json_path))
+        results_json = json.loads(get_content_from_file(args.results_json_path))
+        stat = SurveyStatistics(
+            questions_json[SurveyJsonField.QUESTIONS.value],
+            results_json[SurveyJsonField.QUESTIONS.value],
+        )
+        stat.print_stat()
+        return 0
+
+    except FileNotFoundError:
+        logger.error('JSON file did not found')
+        return 2
+
+    except Exception:
+        logger.exception('An unexpected error.')
+        return 2
+
+
+if __name__ == '__main__':
+    sys.exit(main())