Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
task_df[ComparisonColumnName.TUTOR_ERROR.value].dropna().values))
for cell_errors in errors_list:
for error in cell_errors:
self.error_to_freq[error] += 1
self.error_to_freq[error.strip()] += 1
self.task_to_error_freq[task] += 1
self.fragments_with_error += 1
self.task_to_freq = sort_freq_dict(self.task_to_freq)
Expand Down Expand Up @@ -91,7 +91,7 @@ def __init__(self, solutions_df: pd.DataFrame, to_drop_duplicates: bool = False)
def __parse_issues(issues_str: str) -> List[str]:
if pd.isna(issues_str) or issues_str == ERROR_CONST:
return []
return issues_str.split(';')
return list(map(lambda i: i.strip(), issues_str.split(';')))

@staticmethod
def __add_issues(issues_dict: Dict[str, int], issues: List[str]) -> None:
Expand Down
73 changes: 73 additions & 0 deletions src/python/evaluation/paper_evaluation/survey_handler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Surveys handlers

These scripts allow handling surveys results for the SIGCSE paper.
We have two surveys (for Python and for Java) where participants should choose a fragments
that has better formatting.
Each question in the surveys have randomly orders for fragments.
The left fragment can have good formatting, but at the same time, it can have bad formatting.
To handle these cases we created JSON configs with this information and another one with the results.
These scripts allow processing these config files.

## Usage

Run the [survey_statistics_gathering.py](survey_statistics_gathering.py) with the arguments from command line.

Required arguments:

`questions_json_path` — path to the JSON with labelled questions;
`results_json_path` — path to the JSON with survey results.

An example of `questions_json` file:
```json
{
"questions": [
{
"number": 1,
"left_fragment": "before_formatting",
"right_fragment": "after_formatting"
},
{
"number": 2,
"left_fragment": "after_formatting",
"right_fragment": "before_formatting"
}
]
}
```

An example of `results_json` file:

```json
{
"questions": [
{
"number": 1,
"left_fragment": 0,
"right_fragment": 11,
"both": 0
},
{
"number": 2,
"left_fragment": 10,
"right_fragment": 0,
"both": 1
}
]
}
```

An example of the statistics:
```text
total participants=11
------before----after----any----
1. 0 11 0
2. 1 10 0
3. 0 11 0
4. 0 11 0
5. 0 11 0
6. 1 10 0
7. 0 11 0
8. 1 8 2
9. 0 11 0
10. 0 8 3
```
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from dataclasses import dataclass
from enum import Enum, unique
from typing import Any, Dict, List


@dataclass
class Question:
with_formatting_count: int = 0
without_formatting_count: int = 0
any_formatting_count: int = 0

def get_total(self):
return self.with_formatting_count + self.without_formatting_count + self.any_formatting_count


@unique
class SurveyJsonField(Enum):
NUMBER = 'number'
LEFT_FRAGMENT = 'left_fragment'
RIGHT_FRAGMENT = 'right_fragment'

BEFORE_FORMATTING = 'before_formatting'
BOTH = 'both'

QUESTIONS = 'questions'


@dataclass
class SurveyStatistics:
questions: List[Question]

def __init__(self, questions_json: List[Dict[str, Any]], results_json: List[Dict[str, int]]):
self.questions = []
for result_json in results_json:
question_number = result_json[SurveyJsonField.NUMBER.value]
question = self.__find_json_question(questions_json, question_number)
if question[SurveyJsonField.LEFT_FRAGMENT.value] == SurveyJsonField.BEFORE_FORMATTING.value:
without_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value]
with_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value]
else:
without_formatting_count = result_json[SurveyJsonField.RIGHT_FRAGMENT.value]
with_formatting_count = result_json[SurveyJsonField.LEFT_FRAGMENT.value]
any_formatting_count = result_json[SurveyJsonField.BOTH.value]
self.questions.append(Question(with_formatting_count, without_formatting_count, any_formatting_count))

@staticmethod
def __find_json_question(questions_json: List[Dict[str, Any]], question_number: int) -> Dict[str, Any]:
for question in questions_json:
if question[SurveyJsonField.NUMBER.value] == question_number:
return question
raise ValueError(f'Did not find question {question_number}')

def print_stat(self):
if len(self.questions) == 0:
print('No questions found')
return
print(f'total participants={self.questions[0].get_total()}')
print('------before----after----any----')
for index, question in enumerate(self.questions):
print(f'{index + 1}.\t\t{question.without_formatting_count}\t\t{question.with_formatting_count}\t\t '
f'{question.any_formatting_count}')
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import argparse
import json
import sys
from pathlib import Path

from src.python.evaluation.evaluation_run_tool import logger
from src.python.evaluation.paper_evaluation.survey_handler.survey_statistics import SurveyJsonField, SurveyStatistics
from src.python.review.common.file_system import get_content_from_file


def configure_arguments(parser: argparse.ArgumentParser) -> None:
parser.add_argument('questions_json_path',
type=lambda value: Path(value).absolute(),
help='Path to the JSON with labelled questions')

parser.add_argument('results_json_path',
type=lambda value: Path(value).absolute(),
help='Path to the JSON with survey results')


def main() -> int:
parser = argparse.ArgumentParser()
configure_arguments(parser)

try:
args = parser.parse_args()
questions_json = json.loads(get_content_from_file(args.questions_json_path))
results_json = json.loads(get_content_from_file(args.results_json_path))
stat = SurveyStatistics(
questions_json[SurveyJsonField.QUESTIONS.value],
results_json[SurveyJsonField.QUESTIONS.value],
)
stat.print_stat()
return 0

except FileNotFoundError:
logger.error('JSON file did not found')
return 2

except Exception:
logger.exception('An unexpected error.')
return 2


if __name__ == '__main__':
sys.exit(main())