diff --git a/evals/registry/data/00_scipaper_enzyme_activate_compound/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_activate_compound/samples.jsonl new file mode 100644 index 0000000000..488ee911ff --- /dev/null +++ b/evals/registry/data/00_scipaper_enzyme_activate_compound/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfbe6bb20ea161238ab3d5e3404d9df76514d88c7ed9fe528d2984d0d9fb7d07 +size 538 diff --git a/evals/registry/data/00_scipaper_enzyme_inhibitor/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_inhibitor/samples.jsonl new file mode 100644 index 0000000000..116a945850 --- /dev/null +++ b/evals/registry/data/00_scipaper_enzyme_inhibitor/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49193f70b4942b3ad8aaac0497da4b37b6de40ebde79edb2ec3df6bfc0972924 +size 1444 diff --git a/evals/registry/data/00_scipaper_enzyme_localization/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_localization/samples.jsonl new file mode 100644 index 0000000000..ecf84e1650 --- /dev/null +++ b/evals/registry/data/00_scipaper_enzyme_localization/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2909201b52e0c8f3e71ee569a65a72e4dfbdede7dde2d427c04e3e43185a86ca +size 467 diff --git a/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl b/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl new file mode 100644 index 0000000000..5cf918e3d3 --- /dev/null +++ b/evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6316846852a855013f98ee678e945582013c1269fcad311c8e933859ade77c68 +size 1919 diff --git a/evals/registry/eval_sets/chemistry_enzyme.yaml b/evals/registry/eval_sets/chemistry_enzyme.yaml new file mode 100644 index 0000000000..7a9b3525df --- /dev/null +++ b/evals/registry/eval_sets/chemistry_enzyme.yaml @@ -0,0 +1,6 @@ +chemistry_enzyme: + evals: + - scipaper_enzyme_substrate + - scipaper_enzyme_activate_compound + - scipaper_enzyme_inhibitor + - scipaper_enzyme_localization \ No newline at end of file diff --git a/evals/registry/evals/00_scipaper_enzyme_activate_compound.yaml b/evals/registry/evals/00_scipaper_enzyme_activate_compound.yaml new file mode 100644 index 0000000000..7241a1fe5c --- /dev/null +++ b/evals/registry/evals/00_scipaper_enzyme_activate_compound.yaml @@ -0,0 +1,18 @@ +scipaper_enzyme_activate_compound: + id: scipaper_enzyme_activate_compound.val.csv + metrics: [accuracy] + +scipaper_enzyme_activate_compound.val.csv: + class: evals.elsuite.rag_table_extract:TableExtract + args: + samples_jsonl: 00_scipaper_enzyme_activate_compound/samples.jsonl + instructions: | + Please give a complete list of Activating Compound, Commentand Organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names. + 1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example: + ```csv + Activating Compound,Comment,Organism + Cu2+,at 0.001 mM of the activity without activator,Homo sapiens + p-xylene,"11.4 mM, slight activation",Bos taurus + NH4+, 0.002 mM,Bos taurus + ``` + 2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table! diff --git a/evals/registry/evals/00_scipaper_enzyme_inhibitor.yaml b/evals/registry/evals/00_scipaper_enzyme_inhibitor.yaml new file mode 100644 index 0000000000..3c712c4701 --- /dev/null +++ b/evals/registry/evals/00_scipaper_enzyme_inhibitor.yaml @@ -0,0 +1,18 @@ +scipaper_enzyme_inhibitor: + id: scipaper_enzyme_inhibitor.val.csv + metrics: [accuracy] + +scipaper_enzyme_inhibitor.val.csv: + class: evals.elsuite.rag_table_extract:TableExtract + args: + samples_jsonl: 00_scipaper_enzyme_inhibitor/samples.jsonl + instructions: | + Please give a complete list of Inhibitor, Commentand Organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names. + 1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example: + ```csv + Inhibitor,Comment,Organism + ATP,"competitive inhibition of verapamil-dependent ATPase-activity",Homo sapiens + p-xylene,"11.4 mM, slight inhibitor",Bos taurus + NH4+, 0.002 mM,Bos taurus + ``` + 2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table! diff --git a/evals/registry/evals/00_scipaper_enzyme_localization.yaml b/evals/registry/evals/00_scipaper_enzyme_localization.yaml new file mode 100644 index 0000000000..3c6ca4e590 --- /dev/null +++ b/evals/registry/evals/00_scipaper_enzyme_localization.yaml @@ -0,0 +1,16 @@ +scipaper_enzyme_localization: + id: scipaper_enzyme_localization.val.csv + metrics: [accuracy] + +scipaper_enzyme_localization.val.csv: + class: evals.elsuite.rag_table_extract:TableExtract + args: + samples_jsonl: 00_scipaper_enzyme_localization/samples.jsonl + instructions: | + Please give a complete list of Localization, Commentand and Organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names. + 1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example: + ```csv + Localization,Organism + periplasm,Bos taurus + ``` + 2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table! diff --git a/evals/registry/evals/00_scipaper_enzyme_substrate.yaml b/evals/registry/evals/00_scipaper_enzyme_substrate.yaml new file mode 100644 index 0000000000..b266b07e05 --- /dev/null +++ b/evals/registry/evals/00_scipaper_enzyme_substrate.yaml @@ -0,0 +1,19 @@ +scipaper_enzyme_substrate: + id: scipaper_enzyme_substrate.val.csv + metrics: [accuracy] + +scipaper_enzyme_substrate.val.csv: + class: evals.elsuite.rag_table_extract:TableExtract + args: + samples_jsonl: 00_scipaper_enzyme_substrate/samples.jsonl + instructions: | + Please give a complete list of SMILES structures, Km values, Vmax values, target info (protein or cell line), and organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names. + 1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example: + ```csv + Substrate,Inhibitors, Km value,Km max,Comment,organism,Vmax value,SMILES,Target info,Activating Compound, + ATP,Cu2+,0.001 mM,-,-,Homo sapiens,-,-,ATP-linker aldehyde,Carboxybenzaldehyde, + p-xylene,NADH,0.004 mM,-,-,Homo sapiens,-,C1CCCCC1,-,Methylbenzaldehyde + NADPH,benzaldehyde, 0.12 mM,125 mM,enzyme form ATP,Bos taurus,-,-,NH4+ + + ``` + 2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table!