Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions langtest/langtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ def generated_results(self) -> Optional[pd.DataFrame]:
"""

column_order = [
"dataset_name",
"model_name",
"category",
"test_type",
Expand Down Expand Up @@ -835,6 +836,7 @@ def testcases(self) -> pd.DataFrame:
testcases formatted into a pd.DataFrame
"""
column_order = [
"dataset_name",
"model_name",
"category",
"test_type",
Expand Down Expand Up @@ -889,6 +891,46 @@ def testcases(self) -> pd.DataFrame:

testcases_df = pd.concat(testcases_df).reset_index(drop=True)

elif isinstance(self._testcases, dict) and isinstance(self.model, dict):
testcases_df = []
for k, v in self._testcases.items():
if isinstance(v, dict):
model_testcases_df = pd.DataFrame.from_dict(
[
{
"dataset_name": k,
"model_name": model_name,
**sample.to_dict(),
}
for model_name, samples in v.items()
if isinstance(samples, list)
for sample in samples
]
)
else:
model_testcases_df = pd.DataFrame.from_dict(
[{"model_name": k, **x.to_dict()} for x in v]
)

if "prompt" in model_testcases_df.columns:
return model_testcases_df.fillna("-")

elif (
"test_case" in model_testcases_df.columns
and "original_question" in model_testcases_df.columns
) and self.task != "political":
model_testcases_df["original_question"].update(
model_testcases_df.pop("test_case")
)

testcases_df.append(model_testcases_df)
testcases_df = pd.concat(testcases_df).reset_index(drop=True)

columns = [c for c in column_order if c in testcases_df.columns]
testcases_df = testcases_df[columns]

return testcases_df.fillna("-")

elif self.is_multi_dataset and isinstance(self._testcases, dict):
testcases_df = pd.DataFrame(
[
Expand Down Expand Up @@ -1508,8 +1550,13 @@ def __multi_datasets_generate(self, dataset: Dict[str, list]):
print(f"{'':-^80}\n")
elif isinstance(self.data, list) and self.__is_multi_model:
temp_testcases = self.__single_dataset_generate(dataset)
for model_name, _ in self.model.items():
testcases[model_name] = [sample.copy() for sample in temp_testcases]
if isinstance(temp_testcases, dict) and set(temp_testcases.keys()) == set(
self.model.keys()
):
testcases = temp_testcases
else:
for model_name, _ in self.model.items():
testcases[model_name] = [sample.copy() for sample in temp_testcases]

else:
for dataset_name, samples in dataset.items():
Expand Down
2 changes: 1 addition & 1 deletion langtest/transform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@
"Taiwan", "Uruguay", "United States", "British Virgin Islands", "Virgin Islands"],
"Low-income": ["Afghanistan", "Burundi", "Burkina Faso", "Central African Republic", "Congo", "Eritrea",
"Ethiopia", "Guinea", "Gambia", "Guinea-Bissau", "Liberia", "Madagascar", "Mali", "Mozambique",
"Mtoalawi", "Niger", "North Korea", "Rwanda", "Sudan", "Sierra Leone", "Somalia",
"Niger", "North Korea", "Rwanda", "Sudan", "Sierra Leone", "Somalia",
"South Sudan", "Syria", "Chad", "Togo", "Uganda", "Yemen", "Zambia"],
"Lower-middle-income": ["Angola", "Benin", "Bangladesh", "Bolivia", "Bhutan", "Côte d'Ivoire", "Ivory Coast", "Cameroon",
"Congo", "Comoros", "Cabo Verde", "Djibouti", "Algeria", "Egypt",
Expand Down