Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8672677
Add leaderboard functionality
chakravarthik27 Mar 22, 2024
082baae
Refactor leaderboard.py and add checkpoints directory
chakravarthik27 Mar 27, 2024
73d2504
leaderboard.py: Refactored logic for configuring the harness and savi…
Prikshit7766 Mar 27, 2024
6d8ef40
Add "reports" directory to required_dirs list
Prikshit7766 Mar 27, 2024
f9676f7
Refactor leaderboard output-dir logic
Prikshit7766 Mar 27, 2024
8ce959e
updated leaderboard.py and helpers.py
Prikshit7766 Mar 28, 2024
5bfa8f4
fix score logic for accuracy and robustness
Prikshit7766 Mar 28, 2024
0cc9423
leaderboard.py: added method for saving accuracy, robustness summary …
Prikshit7766 Mar 28, 2024
6d0a564
added support for muti-dataset in leaderboard and added update_leader…
Prikshit7766 Mar 29, 2024
bd4a277
Merge branch 'release/2.0.1' of https://github.com/JohnSnowLabs/langt…
Prikshit7766 Mar 30, 2024
d1769b8
Add blank line for readability in helpers.py
chakravarthik27 Mar 30, 2024
f65712c
updated leaderboard.py
Prikshit7766 Mar 30, 2024
ba4b7c7
Refactor JSONLDataset to aggregate JSONL files
chakravarthik27 Mar 30, 2024
94e6311
updated datasource
Prikshit7766 Mar 30, 2024
94b8eb8
updated leaderboard.py
Prikshit7766 Mar 30, 2024
666278a
resolved: default datasets paths
chakravarthik27 Mar 31, 2024
da2d5cd
Fix dataset name typo in SecuritySample class
chakravarthik27 Mar 31, 2024
c26b258
Fix condition for checking custom labels in DataFactory class
chakravarthik27 Mar 31, 2024
8b871b5
Fix JSONL file loading in DataFactory and JSONLDataset
chakravarthik27 Mar 31, 2024
1733e07
Fix type hinting and formatting in datasource.py and leaderboard.py
chakravarthik27 Mar 31, 2024
67b8841
Add saving generated results and model responses to CSV files, and pr…
chakravarthik27 Apr 1, 2024
fe2bc19
Add show-leaderboard command to langtest leaderboard.py
chakravarthik27 Apr 1, 2024
46c26de
Add MMLU dataset to JSONLDataset in aggregate_jsonl()
chakravarthik27 Apr 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions langtest/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from langtest import Harness
from langtest.config import cli
from langtest.pipelines.embedding import benchmark
from langtest.leaderboard import * # noqa

click.CommandCollection(sources=[cli, benchmark], help="LangTest CLI")

Expand Down
142 changes: 142 additions & 0 deletions langtest/datahandler/dataset_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
datasets_info = {
"BoolQ": {
"split": ("test-tiny", "test", "dev-tiny", "dev", "combined"),
"extension": ".jsonl",
},
"NQ-open": {
"split": ("test-tiny", "test", "combined"),
"extension": ".jsonl",
},
"XSum": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"TruthfulQA": {
"split": ("test-tiny", "test", "combined"),
"extension": ".jsonl",
},
"MMLU": {"split": ("test-tiny", "test", "clinical"), "extension": ".jsonl"},
"OpenBookQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Quac": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Toxicity": {"split": ("test",), "extension": ".jsonl"},
"NarrativeQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"HellaSwag": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Translation": {"split": ("test",), "extension": ".jsonl"},
"BBQ": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Prompt-Injection-Attack": {"split": ("test",), "extension": ".jsonl"},
"Clinical": {
"split": (
"Medical-files",
"Gastroenterology-files",
"Oromaxillofacial-files",
),
"extension": ".jsonl",
},
"ASDiv": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Bigbench": {
"Causal-judgment": {
"split": ("test-tiny", "test"),
"extension": ".jsonl",
},
"DisflQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Abstract-narrative-understanding": {
"split": ("test-tiny", "test"),
"extension": ".jsonl",
},
"DisambiguationQA": {
"split": ("test-tiny", "test"),
"extension": ".jsonl",
},
},
"LogiQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"Narrative-Wedging": {"split": ("test-tiny",), "extension": ".jsonl"},
"Wino-test": {"split": ("test",), "extension": ".jsonl"},
"Legal-Support": {"split": ("test",), "extension": ".jsonl"},
"Factual-Summary-Pairs": {"split": ("test",), "extension": ".jsonl"},
"MultiLexSum": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"wikiDataset": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"CommonsenseQA": {
"split": (
"test-tiny",
"test",
"validation-tiny",
"validation",
"sample-test-tiny",
),
"extension": ".jsonl",
},
"SIQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"PIQA": {
"split": (
"test-tiny",
"test",
"validation-tiny",
"validation",
"sample-test-tiny",
),
"extension": ".jsonl",
},
"Consumer-Contracts": {"split": ("test",), "extension": ".jsonl"},
"Contracts": {"split": ("test",), "extension": ".jsonl"},
"Privacy-Policy": {"split": ("test",), "extension": ".jsonl"},
"Crows-Pairs": {"split": ("test",), "extension": ".csv"},
"StereoSet": {"split": ("test",), "extension": ".jsonl"},
"Fiqa": {"split": ("test",), "extension": ".jsonl"},
"MedQA": {"split": ("test-tiny", "test"), "extension": ".jsonl"},
"MedicationQA": {"split": ("test",), "extension": ".jsonl"},
"LiveQA": {"split": ("test",), "extension": ".jsonl"},
"healthsearchqa": {"split": ("test",), "extension": ".jsonl"},
"PubMedQA": {
"pqaa": {"split": ("test",), "extension": ".jsonl"},
"pqal": {"split": ("test",), "extension": ".jsonl"},
},
"MedMCQA": {
"MedMCQA-Test": {
"split": (
"Anaesthesia",
"Anatomy",
"Biochemistry",
"Dental",
"ENT",
"Forensic_Medicine",
"Gynaecology_Obstetrics",
"Medicine",
"Microbiology",
"Ophthalmology",
"Pathology",
"Pediatrics",
"Pharmacology",
"Physiology",
"Psychiatry",
"Radiology",
"Skin",
"Social_Preventive_Medicine",
"Surgery",
"Unknown",
),
"extension": ".jsonl",
},
"MedMCQA-Validation": {
"split": (
"Anaesthesia",
"Anatomy",
"Biochemistry",
"Dental",
"ENT",
"Forensic_Medicine",
"Gynaecology_Obstetrics",
"Medicine",
"Microbiology",
"Ophthalmology",
"Pathology",
"Pediatrics",
"Pharmacology",
"Physiology",
"Psychiatry",
"Radiology",
"Skin",
"Social_Preventive_Medicine",
"Surgery",
"Unknown",
),
"extension": ".jsonl",
},
},
}
Loading