Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llama_stack/apis/datasetio/datasetio.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,4 @@ async def iterrows(
...

@webmethod(route="/datasets/{dataset_id}/rows", method="POST")
async def append_rows(
self, dataset_id: str, rows: List[Dict[str, Any]]
) -> None: ...
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
2 changes: 1 addition & 1 deletion llama_stack/apis/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


class DatasetPurpose(Enum):
class DatasetPurpose(str, Enum):
"""
Purpose of the dataset. Each purpose has a required input data schema.

Expand Down
60 changes: 17 additions & 43 deletions llama_stack/templates/open-benchmark/open_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing import Dict, List, Tuple

from llama_stack.apis.common.content_types import URL
from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
BenchmarkInput,
Expand Down Expand Up @@ -171,60 +171,34 @@ def get_distribution_template() -> DistributionTemplate:
DatasetInput(
dataset_id="simpleqa",
provider_id="huggingface",
url=URL(uri="https://huggingface.co/datasets/llamastack/simpleqa"),
metadata={
"path": "llamastack/simpleqa",
"split": "train",
},
dataset_schema={
"input_query": {"type": "string"},
"expected_answer": {"type": "string"},
"chat_completion_input": {"type": "string"},
},
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://llamastack/simpleqa?split=train",
),
),
DatasetInput(
dataset_id="mmlu_cot",
provider_id="huggingface",
url=URL(uri="https://huggingface.co/datasets/llamastack/mmlu_cot"),
metadata={
"path": "llamastack/mmlu_cot",
"name": "all",
"split": "test",
},
dataset_schema={
"input_query": {"type": "string"},
"expected_answer": {"type": "string"},
"chat_completion_input": {"type": "string"},
},
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://llamastack/mmlu_cot?split=test&name=all",
),
),
DatasetInput(
dataset_id="gpqa_cot",
provider_id="huggingface",
url=URL(uri="https://huggingface.co/datasets/llamastack/gpqa_0shot_cot"),
metadata={
"path": "llamastack/gpqa_0shot_cot",
"name": "gpqa_main",
"split": "train",
},
dataset_schema={
"input_query": {"type": "string"},
"expected_answer": {"type": "string"},
"chat_completion_input": {"type": "string"},
},
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
),
),
DatasetInput(
dataset_id="math_500",
provider_id="huggingface",
url=URL(uri="https://huggingface.co/datasets/llamastack/math_500"),
metadata={
"path": "llamastack/math_500",
"split": "test",
},
dataset_schema={
"input_query": {"type": "string"},
"expected_answer": {"type": "string"},
"chat_completion_input": {"type": "string"},
},
purpose=DatasetPurpose.eval_messages_answer,
source=URIDataSource(
uri="huggingface://llamastack/math_500?split=test",
),
),
]

Expand Down
70 changes: 20 additions & 50 deletions llama_stack/templates/open-benchmark/run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,62 +158,32 @@ shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets:
- dataset_schema:
input_query:
type: string
expected_answer:
type: string
chat_completion_input:
type: string
url:
uri: https://huggingface.co/datasets/llamastack/simpleqa
metadata:
path: llamastack/simpleqa
split: train
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://llamastack/simpleqa?split=train
metadata: {}
dataset_id: simpleqa
provider_id: huggingface
- dataset_schema:
input_query:
type: string
expected_answer:
type: string
chat_completion_input:
type: string
url:
uri: https://huggingface.co/datasets/llamastack/mmlu_cot
metadata:
path: llamastack/mmlu_cot
name: all
split: test
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://llamastack/mmlu_cot?split=test&name=all
metadata: {}
dataset_id: mmlu_cot
provider_id: huggingface
- dataset_schema:
input_query:
type: string
expected_answer:
type: string
chat_completion_input:
type: string
url:
uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot
metadata:
path: llamastack/gpqa_0shot_cot
name: gpqa_main
split: train
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
provider_id: huggingface
- dataset_schema:
input_query:
type: string
expected_answer:
type: string
chat_completion_input:
type: string
url:
uri: https://huggingface.co/datasets/llamastack/math_500
metadata:
path: llamastack/math_500
split: test
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://llamastack/math_500?split=test
metadata: {}
dataset_id: math_500
provider_id: huggingface
scoring_fns: []
Expand Down
3 changes: 3 additions & 0 deletions llama_stack/templates/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import yaml
from pydantic import BaseModel, Field

from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
Api,
Expand Down Expand Up @@ -214,7 +215,9 @@ def enum_representer(dumper, data):

# Register YAML representer for ModelType
yaml.add_representer(ModelType, enum_representer)
yaml.add_representer(DatasetPurpose, enum_representer)
yaml.SafeDumper.add_representer(ModelType, enum_representer)
yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)

for output_dir in [yaml_output_dir, doc_output_dir]:
output_dir.mkdir(parents=True, exist_ok=True)
Expand Down