From 31e1a4fede34960671f886e1c4fdfee51a7ffc25 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 21 Nov 2023 14:26:58 -0500 Subject: [PATCH 1/8] Add validation for entered coordinate values in Geospatial metadata --- .../dataverse/DatasetFieldValueValidator.java | 84 ++++++++++++++++++- .../iq/dataverse/search/IndexServiceBean.java | 34 ++------ src/main/java/propertyFiles/Bundle.properties | 1 + .../DatasetFieldValueValidatorTest.java | 11 +++ .../search/IndexServiceBeanTest.java | 50 ++++++++--- 5 files changed, 138 insertions(+), 42 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index b6c21014f04..7941fa5efa1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -8,9 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; +import java.util.*; import java.util.logging.Logger; import java.util.regex.Pattern; import jakarta.validation.ConstraintValidator; @@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) { } public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) { - context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue boolean lengthOnly = false; @@ -55,6 +52,33 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte return true; } + // verify no junk in individual fields and values are within range + if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) || + dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) { + try { + verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue()); + } catch (IllegalArgumentException iae) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); + } catch (NullPointerException e) { + } + return false; + } + } + + // validate fields that are siblings and depend on each others values + if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null) { + Optional failureMessage = validateChildConstraints(value.getDatasetField()); + if (failureMessage.isPresent()) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " + + BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation(); + } catch (NullPointerException npe) { + } + return false; + } + } + if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) { boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { @@ -216,4 +240,56 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { return pattern.matcher(userInput).matches(); } + // Validate child fields against each other and return failure message or null if success + public Optional validateChildConstraints(DatasetField dsf) { + final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; + Optional returnFailureMessage = Optional.empty(); + + // Validate Child Constraint for Geospatial Bounding Box + // validate the four points of the box to insure proper layout + // only validate on one value for speed. picked northLatitude since it was at the bottom of the UI + if (fieldName.equals(DatasetFieldConstant.northLatitude)) { + final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; + DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue(); + List cdsf = cv.getChildDatasetFields(); + try { + if (cdsf.size() == 4) { + if (!validateBoundingBox(cdsf.get(0).getValue(), cdsf.get(1).getValue(), cdsf.get(2).getValue(), cdsf.get(3).getValue())) { + returnFailureMessage = Optional.of(failureMessage); + } + } + } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException + returnFailureMessage = Optional.of(failureMessage); + } + } + + return returnFailureMessage; + } + + public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) { + boolean returnVal = false; + + try { + Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon); + Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); + Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); + Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); + returnVal = east < west && south < north; + } catch (IllegalArgumentException e) { + returnVal = false; + } + + return returnVal; + } + + private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException { + int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90; + int min = max * -1; + + final Float returnVal = value != null ? Float.parseFloat(value) : min; // defaults to min if value is missing + if (returnVal < min || returnVal > max) { + throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal, min, max)); + } + return returnVal; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6d0be7a17b..8138a6f414f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,27 +1,6 @@ package edu.harvard.iq.dataverse.search; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -35,6 +14,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -1072,13 +1052,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set vals = new LinkedList<>(); + DatasetFieldCompoundValue val = new DatasetFieldCompoundValue(); + val.setParentDatasetField(dsf); + val.setChildDatasetFields(Arrays.asList( + constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.8"), + constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.9"), // bad value. must be less than west + constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"), + constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1") + )); + vals.add(val); + dsf.setDatasetFieldCompoundValues(vals); + datasetVersion.getDatasetFields().add(dsf); + + final SolrInputDocuments docs = indexService.toSolrDocs(indexableDataset, null); + Optional doc = docs.getDocuments().stream().findFirst(); + assertTrue(doc.isPresent()); + assertTrue(!doc.get().containsKey("geolocation")); + assertTrue(!doc.get().containsKey("boundingBox")); + } + private DatasetField constructBoundingBoxValue(String datasetFieldTypeName, String value) { + DatasetField retVal = new DatasetField(); + retVal.setDatasetFieldType(new DatasetFieldType(datasetFieldTypeName, DatasetFieldType.FieldType.TEXT, false)); + retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value))); + return retVal; + } + private IndexableDataset createIndexableDataset() { final Dataset dataset = MocksFactory.makeDataset(); dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.666", "FAKE/fake", "/", DOIServiceBean.DOI_RESOLVER_URL, null)); From 808efdd7cf08c3ec4955dd25d8bbd1078c3f09db Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 27 Nov 2023 14:54:25 -0500 Subject: [PATCH 2/8] fixt east/west mixup --- .../harvard/iq/dataverse/DatasetFieldValueValidator.java | 2 +- src/main/java/propertyFiles/Bundle.properties | 2 +- .../iq/dataverse/DatasetFieldValueValidatorTest.java | 8 ++++---- .../harvard/iq/dataverse/search/IndexServiceBeanTest.java | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 7941fa5efa1..23301bf86a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -274,7 +274,7 @@ public static boolean validateBoundingBox(final String westLon, final String eas Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); - returnVal = east < west && south < north; + returnVal = west < east && south < north; } catch (IllegalArgumentException e) { returnVal = false; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 15b0ebb8020..4db66006a80 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1623,7 +1623,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi dataset.metadata.invalidEntry=is not a valid entry. dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format. dataset.metadata.invalidNumber=is not a valid number. -dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. West must be greater than East and North must be greater than South. +dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. dataset.metadata.invalidInteger=is not a valid integer. dataset.metadata.invalidURL=is not a valid URL. dataset.metadata.invalidEmail=is not a valid email address. diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index 2729e5b042a..37274f5306a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -216,12 +216,12 @@ public void testInvalidEmail() { @Test public void testBoundingBoxValidity() { // valid tests - assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "-90")); - assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", null)); + assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90")); + assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", null)); // invalid tests - assertTrue(!DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "junk")); - assertTrue(!DatasetFieldValueValidator.validateBoundingBox("40", "45", "90", "0")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90")); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java index a0330e1fcad..adf48e05f09 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java @@ -110,8 +110,8 @@ public void testValidateBoundingBox() throws SolrServerException, IOException { DatasetFieldCompoundValue val = new DatasetFieldCompoundValue(); val.setParentDatasetField(dsf); val.setChildDatasetFields(Arrays.asList( - constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.8"), - constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.9"), // bad value. must be less than west + constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.9"), // bad value. must be less than east + constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.8"), constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"), constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1") )); From 78404cef91d20f0f41753866e4ee156d51cd3a69 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 28 Nov 2023 11:18:26 -0500 Subject: [PATCH 3/8] fixt box validation for misisng values --- .../harvard/iq/dataverse/DatasetFieldValueValidator.java | 8 ++++---- src/main/java/propertyFiles/Bundle.properties | 2 +- .../iq/dataverse/DatasetFieldValueValidatorTest.java | 4 +++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 23301bf86a1..821497299a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -274,7 +274,7 @@ public static boolean validateBoundingBox(final String westLon, final String eas Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); - returnVal = west < east && south < north; + returnVal = west <= east && south <= north; } catch (IllegalArgumentException e) { returnVal = false; } @@ -286,9 +286,9 @@ private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90; int min = max * -1; - final Float returnVal = value != null ? Float.parseFloat(value) : min; // defaults to min if value is missing - if (returnVal < min || returnVal > max) { - throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal, min, max)); + final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN; + if (returnVal.isNaN() || returnVal < min || returnVal > max) { + throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max)); } return returnVal; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4db66006a80..6b2c30f2a4d 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1623,7 +1623,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi dataset.metadata.invalidEntry=is not a valid entry. dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format. dataset.metadata.invalidNumber=is not a valid number. -dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. +dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. Missing values are NOT allowed. dataset.metadata.invalidInteger=is not a valid integer. dataset.metadata.invalidURL=is not a valid URL. dataset.metadata.invalidEmail=is not a valid email address. diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index 37274f5306a..31e450a2884 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -217,11 +217,13 @@ public void testInvalidEmail() { public void testBoundingBoxValidity() { // valid tests assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90")); - assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", null)); // invalid tests + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", null, "90", null)); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, "180", null, "90")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, "90", "-90")); } } From 811743d07b5cf645207539294c7e5c0bb1fb6dea Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 28 Nov 2023 15:13:27 -0500 Subject: [PATCH 4/8] fixt box validation for missing values --- .../iq/dataverse/DatasetFieldValueValidator.java | 13 +++++++++---- .../dataverse/DatasetFieldValueValidatorTest.java | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index 821497299a9..ea9f6c9c3cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -67,12 +67,17 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte } // validate fields that are siblings and depend on each others values - if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null) { + if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null && + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) { Optional failureMessage = validateChildConstraints(value.getDatasetField()); if (failureMessage.isPresent()) { try { context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " + BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation(); + + // save the failure message in the parent so we don't keep validating the children + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get()); + } catch (NullPointerException npe) { } return false; @@ -240,15 +245,15 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { return pattern.matcher(userInput).matches(); } - // Validate child fields against each other and return failure message or null if success + // Validate child fields against each other and return failure message or Optional.empty() if success public Optional validateChildConstraints(DatasetField dsf) { final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; Optional returnFailureMessage = Optional.empty(); // Validate Child Constraint for Geospatial Bounding Box // validate the four points of the box to insure proper layout - // only validate on one value for speed. picked northLatitude since it was at the bottom of the UI - if (fieldName.equals(DatasetFieldConstant.northLatitude)) { + if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude) + || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) { final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue(); List cdsf = cv.getChildDatasetFields(); diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index 31e450a2884..3f85acc1f87 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -217,6 +217,7 @@ public void testInvalidEmail() { public void testBoundingBoxValidity() { // valid tests assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90")); + assertTrue(DatasetFieldValueValidator.validateBoundingBox("0", "0", "0", "0")); // invalid tests assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", null, "90", null)); @@ -224,6 +225,7 @@ public void testBoundingBoxValidity() { assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0")); assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90")); - assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, "90", "-90")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("", "", "", "")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, null, null)); } } From 857998d322ba996654082e9f85790c9214bbeae1 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 29 Nov 2023 17:29:56 -0500 Subject: [PATCH 5/8] cosmetic changes to the geospatial block - more descriptive labels for the entry form + extra formatting for display. #9547 --- scripts/api/data/metadatablocks/geospatial.tsv | 8 ++++---- .../java/propertyFiles/geospatial.properties | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index a3a8e7efd58..2d86a198a9f 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -8,10 +8,10 @@ otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - westLongitude West Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 #VALUEW FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 #VALUEE FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + northLongitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 #VALUEN FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + southLongitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 #VALUES FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1 diff --git a/src/main/java/propertyFiles/geospatial.properties b/src/main/java/propertyFiles/geospatial.properties index 04db8d3d05f..86f297c29b9 100644 --- a/src/main/java/propertyFiles/geospatial.properties +++ b/src/main/java/propertyFiles/geospatial.properties @@ -8,10 +8,10 @@ datasetfieldtype.city.title=City datasetfieldtype.otherGeographicCoverage.title=Other datasetfieldtype.geographicUnit.title=Geographic Unit datasetfieldtype.geographicBoundingBox.title=Geographic Bounding Box -datasetfieldtype.westLongitude.title=West Longitude -datasetfieldtype.eastLongitude.title=East Longitude -datasetfieldtype.northLongitude.title=North Latitude -datasetfieldtype.southLongitude.title=South Latitude +datasetfieldtype.westLongitude.title=Westernmost (Left) Longitude +datasetfieldtype.eastLongitude.title=Easternmost (Right) Longitude +datasetfieldtype.northLongitude.title=Northernmost (Top) Latitude +datasetfieldtype.southLongitude.title=Southernmost (Bottom) Latitude datasetfieldtype.geographicCoverage.description=Information on the geographic coverage of the data. Includes the total geographic scope of the data. datasetfieldtype.country.description=The country or nation that the Dataset is about. datasetfieldtype.state.description=The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. @@ -89,10 +89,10 @@ controlledvocabulary.country.cook_islands=Cook Islands controlledvocabulary.country.costa_rica=Costa Rica controlledvocabulary.country.croatia=Croatia controlledvocabulary.country.cuba=Cuba -controlledvocabulary.country.curacao=Curaçao +controlledvocabulary.country.curacao=Cura\u00e7ao controlledvocabulary.country.cyprus=Cyprus controlledvocabulary.country.czech_republic=Czech Republic -controlledvocabulary.country.cote_d'ivoire=Côte d'Ivoire +controlledvocabulary.country.cote_d'ivoire=C\u00f4te d'Ivoire controlledvocabulary.country.denmark=Denmark controlledvocabulary.country.djibouti=Djibouti controlledvocabulary.country.dominica=Dominica @@ -216,8 +216,8 @@ controlledvocabulary.country.qatar=Qatar controlledvocabulary.country.romania=Romania controlledvocabulary.country.russian_federation=Russian Federation controlledvocabulary.country.rwanda=Rwanda -controlledvocabulary.country.reunion=Réunion -controlledvocabulary.country.saint_barthelemy=Saint Barthélemy +controlledvocabulary.country.reunion=R\u00e9union +controlledvocabulary.country.saint_barthelemy=Saint Barth\u00e9lemy controlledvocabulary.country.saint_helena,_ascension_and_tristan_da_cunha=Saint Helena, Ascension and Tristan da Cunha controlledvocabulary.country.saint_kitts_and_nevis=Saint Kitts and Nevis controlledvocabulary.country.saint_lucia=Saint Lucia @@ -282,4 +282,4 @@ controlledvocabulary.country.western_sahara=Western Sahara controlledvocabulary.country.yemen=Yemen controlledvocabulary.country.zambia=Zambia controlledvocabulary.country.zimbabwe=Zimbabwe -controlledvocabulary.country.aland_islands=Åland Islands +controlledvocabulary.country.aland_islands=\u00c5land Islands From 6391f03767042b37d67a2df42a641492ca892f8f Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 29 Nov 2023 17:41:27 -0500 Subject: [PATCH 6/8] a short release note stub telling admins to update the geospatial block #9547 --- .../9547-validation-for-geospatial-metadata.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md new file mode 100644 index 00000000000..a44e1a3732b --- /dev/null +++ b/doc/release-notes/9547-validation-for-geospatial-metadata.md @@ -0,0 +1,9 @@ +Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. + +For the "upgrade" steps section: + +Update Geospatial Metadata Block + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` + From 41784f39681f58d7af6b4233e7bbbfc0a0206adc Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Thu, 30 Nov 2023 09:20:11 -0500 Subject: [PATCH 7/8] fix for load test where coordinates come in a different order --- .../iq/dataverse/DatasetFieldValueValidator.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index ea9f6c9c3cc..610bb70ff49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -255,13 +255,17 @@ public Optional validateChildConstraints(DatasetField dsf) { if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude) || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) { final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; - DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue(); - List cdsf = cv.getChildDatasetFields(); + try { - if (cdsf.size() == 4) { - if (!validateBoundingBox(cdsf.get(0).getValue(), cdsf.get(1).getValue(), cdsf.get(2).getValue(), cdsf.get(3).getValue())) { - returnFailureMessage = Optional.of(failureMessage); - } + final Map coords = new HashMap<>(); + dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> { + coords.put(f.getDatasetFieldType().getName(), f.getValue()); + }); + if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude), + coords.get(DatasetFieldConstant.eastLongitude), + coords.get(DatasetFieldConstant.northLatitude), + coords.get(DatasetFieldConstant.southLatitude))) { + returnFailureMessage = Optional.of(failureMessage); } } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException returnFailureMessage = Optional.of(failureMessage); From 2d6efc5ee7f05082bcdcd3c837283b19edb8bec8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 10:43:05 -0500 Subject: [PATCH 8/8] undoing the formatting for the bbox fields: that formatting gets passed to solr! :( #9547 --- scripts/api/data/metadatablocks/geospatial.tsv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index 2d86a198a9f..ce481c1bf84 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -8,10 +8,10 @@ otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 #VALUEW FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 #VALUEE FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - northLongitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 #VALUEN FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - southLongitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 #VALUES FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + northLongitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + southLongitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1