diff --git a/src/main/java/org/breedinginsight/brapps/importer/services/processors/GermplasmProcessor.java b/src/main/java/org/breedinginsight/brapps/importer/services/processors/GermplasmProcessor.java index 81d9bef93..aad1720d9 100644 --- a/src/main/java/org/breedinginsight/brapps/importer/services/processors/GermplasmProcessor.java +++ b/src/main/java/org/breedinginsight/brapps/importer/services/processors/GermplasmProcessor.java @@ -115,6 +115,17 @@ public GermplasmProcessor(BrAPIGermplasmService brAPIGermplasmService, DSLContex public void getExistingBrapiData(List importRows, Program program) throws ApiException { + // BI-2573 - sort by entry no here so ordering is consistent everywhere in processor + importRows.sort((left, right) -> { + if (left.getGermplasm().getEntryNo() == null || right.getGermplasm().getEntryNo() == null) { + return 0; + } else { + Integer leftEntryNo = Integer.parseInt(left.getGermplasm().getEntryNo()); + Integer rightEntryNo = Integer.parseInt(right.getGermplasm().getEntryNo()); + return leftEntryNo.compareTo(rightEntryNo); + } + }); + // Get all of our objects specified in the data file by their unique attributes Map germplasmAccessionNumbers = new HashMap<>(); for (int i = 0; i < importRows.size(); i++) { @@ -265,16 +276,7 @@ public Map process(ImportUpload upload, List
entryNumberCounts = new HashMap<>(); List userProvidedEntryNumbers = new ArrayList<>(); ValidationErrors validationErrors = new ValidationErrors(); - // Sort importRows by entry number (if present). - importRows.sort((left, right) -> { - if (left.getGermplasm().getEntryNo() == null || right.getGermplasm().getEntryNo() == null) { - return 0; - } else { - Integer leftEntryNo = Integer.parseInt(left.getGermplasm().getEntryNo()); - Integer rightEntryNo = Integer.parseInt(right.getGermplasm().getEntryNo()); - return leftEntryNo.compareTo(rightEntryNo); - } - }); + for (int i = 0; i < importRows.size(); i++) { log.debug("processing germplasm row: " + (i+1)); BrAPIImport brapiImport = importRows.get(i); diff --git a/src/test/java/org/breedinginsight/brapps/importer/GermplasmFileImportTest.java b/src/test/java/org/breedinginsight/brapps/importer/GermplasmFileImportTest.java index 9a58281bf..d248d42ad 100644 --- a/src/test/java/org/breedinginsight/brapps/importer/GermplasmFileImportTest.java +++ b/src/test/java/org/breedinginsight/brapps/importer/GermplasmFileImportTest.java @@ -30,18 +30,19 @@ import org.breedinginsight.utilities.Utilities; import org.jooq.DSLContext; import org.junit.jupiter.api.*; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import tech.tablesaw.api.Row; import tech.tablesaw.api.Table; import javax.inject.Inject; import java.io.File; import java.time.OffsetDateTime; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import static io.micronaut.http.HttpRequest.GET; import static io.micronaut.http.HttpRequest.POST; +import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.junit.jupiter.api.Assertions.*; @MicronautTest @@ -733,6 +734,197 @@ public void selfReferenceParentError() { assertEquals(GermplasmProcessor.circularDependency, result.getAsJsonObject("progress").get("message").getAsString()); } + /** + * Test preview and commit data when entry numbers are in ascending order in file. This is the normal case and here + * to catch any possible regressions + * + * @param commit controls whether import is preview or commit + */ + @ParameterizedTest + @ValueSource(booleans = {false, true}) + @SneakyThrows + public void entryNoAscending(boolean commit) { + String pathname = "src/test/resources/files/germplasm_import/entry_no_asc.csv"; + Table fileData = Table.read().file(pathname); + String listName = "EntryNoAsc"; + String listDescription = "Entry numbers in ascending order with pedigree"; + + JsonObject result = importGermplasm(pathname, listName, listDescription, commit); + assertEquals(200, result.getAsJsonObject("progress").get("statuscode").getAsInt()); + + // preview table is sorted by entry number + fileData = fileData.sortAscendingOn("Entry No"); + + JsonArray previewRows = result.get("preview").getAsJsonObject().get("rows").getAsJsonArray(); + checkEntryNoFields(fileData, previewRows, commit, listName, listDescription, "EntryNoAscGerm 2", "EntryNoAscGerm 3"); + } + + /** + * Prior to BI-2573 this file would result in a false positive circular dependency error. The reason is that when + * entry no order did not match germplasm file order, pedigree information was assigned to the wrong germplasm + * record due to inconsistencies in sorting during processing. + * + * Test preview and commit data when entry numbers are in descending order in file + * + * @param commit controls whether import is preview or commit + */ + @ParameterizedTest + @ValueSource(booleans = {false, true}) + @SneakyThrows + public void entryNoDescending(boolean commit) { + String pathname = "src/test/resources/files/germplasm_import/entry_no_desc.csv"; + Table fileData = Table.read().file(pathname); + String listName = "EntryNoDesc"; + String listDescription = "Entry numbers in descending order with pedigree"; + + JsonObject result = importGermplasm(pathname, listName, listDescription, commit); + assertEquals(200, result.getAsJsonObject("progress").get("statuscode").getAsInt()); + + // preview table is sorted by entry number + fileData = fileData.sortAscendingOn("Entry No"); + + JsonArray previewRows = result.get("preview").getAsJsonObject().get("rows").getAsJsonArray(); + checkEntryNoFields(fileData, previewRows, commit, listName, listDescription, "EntryNoDescGerm 2", "EntryNoDescGerm 1"); + } + + /** + * Shared method to perform entry number order tests for preview and commit + * + * @param fileData raw file data + * @param previewRows processed preview rows + * @param commit whether checking preview or commit fields + * @param listName name of list when committing data + * @param listDescription list description when committing data + * @param motherName name of mother for pedigree check (from file) + * @param fatherName name of father for pedigree check (from file) + */ + private void checkEntryNoFields(Table fileData, JsonArray previewRows, boolean commit, + String listName, String listDescription, + String motherName, String fatherName) { + List germplasmNames = new ArrayList<>(); + + for (int i = 0; i < previewRows.size(); i++) { + JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject"); + germplasmNames.add(germplasm.get("germplasmName").getAsString()); + checkBasicResponse(germplasm, fileData, i); + + if (!commit) { + // preview checks + checkEntryNoPreviewFields(fileData, previewRows, i, motherName, fatherName); + } else { + // commit checks + checkEntryNoCommitFields(fileData, previewRows, i, motherName, fatherName); + } + } + + if (commit) { + // Check the germplasm list + checkGermplasmList(Germplasm.constructGermplasmListName(listName, validProgram), listDescription, germplasmNames); + } + } + + /** + * Check important field in preview data beyond basic info + * - entry number assignment + * - pedigree assignment + * + * @param fileData raw file data + * @param previewRows processed preview rows + */ + private void checkEntryNoPreviewFields(Table fileData, JsonArray previewRows, int i, String motherName, String fatherName) { + JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject"); + + // Check preview specific items + // Germplasm name (display name) + assertEquals(fileData.getString(i, "Germplasm Name"), germplasm.get("germplasmName").getAsString()); + JsonObject additionalInfo = germplasm.getAsJsonObject("additionalInfo"); + + // check entry number assignment + assertEquals(fileData.getString(i, "Entry No"), additionalInfo.get("importEntryNumber").getAsString(), "Wrong entry number"); + // check pedigree entry number assignment + String fileFemaleEntryNo = fileData.getString(i, "Female Parent Entry No"); + if (isNotBlank(fileFemaleEntryNo)) { + assertEquals(fileFemaleEntryNo, additionalInfo.get("femaleParentEntryNo").getAsString(), "Wrong female parent entry number"); + } + String fileMaleEntryNo = fileData.getString(i, "Male Parent Entry No"); + if (isNotBlank(fileMaleEntryNo)) { + assertEquals(fileMaleEntryNo, additionalInfo.get("maleParentEntryNo").getAsString(), "Wrong male parent entry number"); + } + + // check preview pedigree values + // only care about entry nos for this test case, not using GIDs + if (isNotBlank(fileFemaleEntryNo) && isNotBlank(fileMaleEntryNo)) { + String pedigree = germplasm.get("pedigree").getAsString(); + String[] pedigreeParts = pedigree.split("/"); + String mother = pedigreeParts[0]; + String father = pedigreeParts[1]; + assertEquals(motherName, mother, "Wrong mother"); + assertEquals(fatherName, father, "Wrong father"); + } + } + + /** + * Check properties of processed and committed germplasm objects match what would be expected based on file contents. + * Of particular importance are: + * - germplasm are in entry no order + * - gids are assigned in entry no order + * - correct pedigree was assigned to germplasm based on file specification + * + * @param fileData raw file data + * @param previewRows processed preview rows + * @param i row index + */ + private void checkEntryNoCommitFields(Table fileData, JsonArray previewRows, int i, String motherName, String fatherName) { + JsonObject germplasm = previewRows.get(i).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject"); + // Check commit specific items + // Germplasm name (display name) + String expectedGermplasmName = String.format("%s [%s-%s]", fileData.getString(i, "Germplasm Name"), validProgram.getKey(), germplasm.get("accessionNumber").getAsString()); + assertEquals(expectedGermplasmName, germplasm.get("germplasmName").getAsString()); + // Created Date + JsonObject additionalInfo = germplasm.getAsJsonObject("additionalInfo"); + assertTrue(additionalInfo.has(BrAPIAdditionalInfoFields.CREATED_DATE), "createdDate is missing"); + // Accession Number + assertTrue(germplasm.has("accessionNumber"), "accessionNumber missing"); + + // check that gids are assigned in entry no order + if (i > 0) { + JsonObject previousGermplasm = previewRows.get(i-1).getAsJsonObject().getAsJsonObject("germplasm").getAsJsonObject("brAPIObject"); + int lastEntryNo = previousGermplasm.getAsJsonObject("additionalInfo").get("importEntryNumber").getAsInt(); + int lastGid = previousGermplasm.get("accessionNumber").getAsInt(); + int currentEntryNo = additionalInfo.get("importEntryNumber").getAsInt(); + int currentGid = germplasm.get("accessionNumber").getAsInt(); + assertEquals(1, currentEntryNo-lastEntryNo, "Expected entry number to be monotonically increasing"); + assertEquals(1, currentGid-lastGid, "Expected GID to be monotonically increasing"); + } + + // pedigree check just for single germplasm in file with pedigree info + String fileFemaleEntryNo = fileData.getString(i, "Female Parent Entry No"); + String fileMaleEntryNo = fileData.getString(i, "Male Parent Entry No"); + + // only care about entry nos for this test case, not using GIDs + if (isNotBlank(fileFemaleEntryNo) && isNotBlank(fileMaleEntryNo)) { + String pedigree = germplasm.get("pedigree").getAsString(); + String[] pedigreeParts = pedigree.split("/"); + String mother = pedigreeParts[0]; + String father = pedigreeParts[1]; + String regexMatcher = "^(.*\\b) \\[([A-Z]{2,6})-(\\d+)\\]$"; + assertTrue(mother.matches(String.format(regexMatcher, motherName)), "Wrong mother"); + assertTrue(father.matches(String.format(regexMatcher, fatherName)), "Wrong father"); + } + + // External Reference germplasm + JsonArray externalReferences = germplasm.getAsJsonArray("externalReferences"); + boolean referenceFound = false; + for (JsonElement reference: externalReferences) { + String referenceSource = reference.getAsJsonObject().get("referenceSource").getAsString(); + if (referenceSource.equals(BRAPI_REFERENCE_SOURCE)) { + referenceFound = true; + break; + } + } + assertTrue(referenceFound, "Germplasm UUID reference not found"); + } + private JsonObject importGermplasm(String pathname, String listName, String listDescription, Boolean commit) throws InterruptedException { File file = new File(pathname); diff --git a/src/test/resources/files/germplasm_import/entry_no_asc.csv b/src/test/resources/files/germplasm_import/entry_no_asc.csv new file mode 100644 index 000000000..f2f135bef --- /dev/null +++ b/src/test/resources/files/germplasm_import/entry_no_asc.csv @@ -0,0 +1,4 @@ +GID,Germplasm Name,Breeding Method,Source,Female Parent GID,Male Parent GID,Entry No,Female Parent Entry No,Male Parent Entry No,External UID,Synonyms +,EntryNoAscGerm 1,BCR,Test,,,1,2,3,1, +,EntryNoAscGerm 2,BCR,Test,,,2,,,2, +,EntryNoAscGerm 3,BCR,Test,,,3,,,3, \ No newline at end of file diff --git a/src/test/resources/files/germplasm_import/entry_no_desc.csv b/src/test/resources/files/germplasm_import/entry_no_desc.csv new file mode 100644 index 000000000..a47a1f7d4 --- /dev/null +++ b/src/test/resources/files/germplasm_import/entry_no_desc.csv @@ -0,0 +1,4 @@ +GID,Germplasm Name,Breeding Method,Source,Female Parent GID,Male Parent GID,Entry No,Female Parent Entry No,Male Parent Entry No,External UID,Synonyms +,EntryNoDescGerm 1,BCR,Test,,,3,,,3, +,EntryNoDescGerm 2,BCR,Test,,,2,,,2, +,EntryNoDescGerm 3,BCR,Test,,,1,2,3,1, \ No newline at end of file