Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/openbench/evals/gpqa_diamond.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,23 @@
# There is one difference between this and the original gpqa simple eval - the prompts are not reshuffled for every epoch. Shouldn't be that big of a deal, but worth noting.
def record_to_mcq_sample(record: dict) -> MCQSample:
"""Convert a GQPQA Diamond record to an openbench MCQSample."""
random.seed(0)
question = record["Question"]
options = [
record["Correct Answer"],
record["Incorrect Answer 1"],
record["Incorrect Answer 2"],
record["Incorrect Answer 3"],
]
random.shuffle(options)
# Use a per-question random state based on question hash
# This ensures consistent ordering across runs while varying across samples
rng = random.Random(hash(question) % (2**32))
rng.shuffle(options)
# Get index of correct answer and convert to A, B, C, D
correct_index = options.index(record["Correct Answer"])
correct_letter = "ABCD"[correct_index]
return MCQSample(
input=MULTIPLE_CHOICE_PROMPT_TEMPLATE.format(
prompt=record["Question"],
prompt=question,
option_a=options[0],
option_b=options[1],
option_c=options[2],
Expand Down
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading