diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md new file mode 100644 index 00000000000..a44e1a3732b --- /dev/null +++ b/doc/release-notes/9547-validation-for-geospatial-metadata.md @@ -0,0 +1,9 @@ +Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. + +For the "upgrade" steps section: + +Update Geospatial Metadata Block + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` + diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index a3a8e7efd58..ce481c1bf84 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -8,10 +8,10 @@ otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - westLongitude West Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + northLongitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + southLongitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1 diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index b6c21014f04..610bb70ff49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -8,9 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; +import java.util.*; import java.util.logging.Logger; import java.util.regex.Pattern; import jakarta.validation.ConstraintValidator; @@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) { } public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) { - context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue boolean lengthOnly = false; @@ -55,6 +52,38 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte return true; } + // verify no junk in individual fields and values are within range + if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) || + dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) { + try { + verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue()); + } catch (IllegalArgumentException iae) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); + } catch (NullPointerException e) { + } + return false; + } + } + + // validate fields that are siblings and depend on each others values + if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null && + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) { + Optional failureMessage = validateChildConstraints(value.getDatasetField()); + if (failureMessage.isPresent()) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " + + BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation(); + + // save the failure message in the parent so we don't keep validating the children + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get()); + + } catch (NullPointerException npe) { + } + return false; + } + } + if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) { boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { @@ -216,4 +245,60 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { return pattern.matcher(userInput).matches(); } + // Validate child fields against each other and return failure message or Optional.empty() if success + public Optional validateChildConstraints(DatasetField dsf) { + final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; + Optional returnFailureMessage = Optional.empty(); + + // Validate Child Constraint for Geospatial Bounding Box + // validate the four points of the box to insure proper layout + if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude) + || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) { + final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; + + try { + final Map coords = new HashMap<>(); + dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> { + coords.put(f.getDatasetFieldType().getName(), f.getValue()); + }); + if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude), + coords.get(DatasetFieldConstant.eastLongitude), + coords.get(DatasetFieldConstant.northLatitude), + coords.get(DatasetFieldConstant.southLatitude))) { + returnFailureMessage = Optional.of(failureMessage); + } + } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException + returnFailureMessage = Optional.of(failureMessage); + } + } + + return returnFailureMessage; + } + + public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) { + boolean returnVal = false; + + try { + Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon); + Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); + Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); + Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); + returnVal = west <= east && south <= north; + } catch (IllegalArgumentException e) { + returnVal = false; + } + + return returnVal; + } + + private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException { + int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90; + int min = max * -1; + + final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN; + if (returnVal.isNaN() || returnVal < min || returnVal > max) { + throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max)); + } + return returnVal; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6d0be7a17b..8138a6f414f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,27 +1,6 @@ package edu.harvard.iq.dataverse.search; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -35,6 +14,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -1072,13 +1052,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set vals = new LinkedList<>(); + DatasetFieldCompoundValue val = new DatasetFieldCompoundValue(); + val.setParentDatasetField(dsf); + val.setChildDatasetFields(Arrays.asList( + constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.9"), // bad value. must be less than east + constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.8"), + constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"), + constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1") + )); + vals.add(val); + dsf.setDatasetFieldCompoundValues(vals); + datasetVersion.getDatasetFields().add(dsf); + + final SolrInputDocuments docs = indexService.toSolrDocs(indexableDataset, null); + Optional doc = docs.getDocuments().stream().findFirst(); + assertTrue(doc.isPresent()); + assertTrue(!doc.get().containsKey("geolocation")); + assertTrue(!doc.get().containsKey("boundingBox")); + } + private DatasetField constructBoundingBoxValue(String datasetFieldTypeName, String value) { + DatasetField retVal = new DatasetField(); + retVal.setDatasetFieldType(new DatasetFieldType(datasetFieldTypeName, DatasetFieldType.FieldType.TEXT, false)); + retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value))); + return retVal; + } + private IndexableDataset createIndexableDataset() { final Dataset dataset = MocksFactory.makeDataset(); dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.666", "FAKE/fake", "/", DOIServiceBean.DOI_RESOLVER_URL, null));