-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
69 lines (55 loc) · 2.96 KB
/
data.py
File metadata and controls
69 lines (55 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Downloads the MacGyver problem–solution corpus from the `allenai/MacGyver`
GitHub release, applies the step-wise MacGyver prompt template (paper §5),
filters to solvable problems, and shuffles deterministically with seed 42.
"""
import pandas as pd
import numpy as np
df = pd.read_excel("https://github.com/allenai/MacGyver/blob/main/data/MacGyver/problem_solution_pair.xlsx?raw=true", engine="openpyxl")
df.head()
df.to_csv('problem_solution_pair.csv')
macgyver_prompt = """
{}
### Problem:
{}
### Existing steps, if any:
{}"""
num_stepvers = 10
step_num = 1
max_steps = "ten"
EOS_TOKEN = "<|eot_id|>" # Must add EOS_TOKEN
def format_macgyver_prompt(examples): # Note that {n} steps have already been given beneath the problem, and are required to generate the new step when those steps have been completed. Change first into {n}?
# and try to use the items in creative and innovative ways while
instructions = f"""Please act as Macgyver, an intelligent person skilled in using ordinary tools in unconventional ways to solve problems.
Given the problem below, create ONE possible next step {step_num} to a multi-stage solution considering all the constraints and previous steps, if any.
Solve the problem in the fewest steps possible.
Arrive at the complete solution by step {max_steps}, such that it can solve the problem.
Be clear, specific and concise, maintaining practicality.
Ensure that the step you generate brings you significantly closer to solving the problem fully.
Do not include explanation in your response.
Do not generate step {step_num + 1}, etc.
Do NOT generate anything extra other than the one step, and limit the length of the one step you generate to one sentence maximum.
Make your response creative and innovative.
Respond STRICTLY in this format:
Step {step_num}: <generate version of step {step_num} here>
If a new step does not need to be generated to solve the problem, respond strictly with "STOP"
""" # is the solution complete? if yes, reply only with "Complete: " and "Y", and reply only with "Complete: N" for no.
# examples["instruction"]
inputs = examples["Problem"]
solvable = examples["Solvable?"]
unconventional = examples["Unconventional?"]
outputs = examples["Solution"]
texts = []
for input, solvable, unconventional, output in zip(inputs, solvable, unconventional, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
# format is an inbuilt py funtion
text = macgyver_prompt.format(instructions, input, "") + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
from datasets import load_dataset
macgyver = load_dataset("csv", data_files="problem_solution_pair.csv", split="train")
macgyver = macgyver.map(format_macgyver_prompt, batched = True,)
macgyver = macgyver.filter(lambda example: example["Solvable?"] == "Yes")
seed1 = 42
macgyver = macgyver.shuffle(seed = seed1)
macgyver = macgyver.flatten_indices()