From a56822bc78ac1c3e59b59dfc593604c1b2ce3e32 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 14:21:13 -0400 Subject: [PATCH 1/8] checks on storageidentifiers --- .../iq/dataverse/dataaccess/FileAccessIO.java | 33 +++++++++++++-- .../iq/dataverse/dataaccess/S3AccessIO.java | 40 +++++++++++++++++-- .../dataaccess/FileAccessIOTest.java | 25 ++++++++++++ .../dataverse/dataaccess/S3AccessIOTest.java | 36 ++++++++++++++--- 4 files changed, 121 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index 1c60981c82a..f055ef3fb68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -29,10 +29,13 @@ import java.io.FileOutputStream; // NIO imports: import java.nio.file.Files; +import java.nio.file.InvalidPathException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; // Dataverse imports: import edu.harvard.iq.dataverse.DataFile; @@ -303,7 +306,7 @@ public Path getAuxObjectAsPath(String auxItemTag) throws IOException { throw new IOException("Null or invalid Auxiliary Object Tag."); } if(isDirectAccess()) { - //Overlay case + //includes overlay case return Paths.get(physicalPath.toString() + "." + auxItemTag); } String datasetDirectory = getDatasetDirectory(); @@ -427,8 +430,8 @@ public void deleteAllAuxObjects() throws IOException { } } - - + + @Override public String getStorageLocation() { // For a local file, the "storage location" is a complete, absolute @@ -656,4 +659,28 @@ private String stripDriverId(String storageIdentifier) { } return storageIdentifier; } + + //Confirm inputs are of the form of a relative file path that doesn't contain . or .. + protected static boolean isValidIdentifier(String driverId, String storageId) { + String pathString = storageId.substring(storageId.lastIndexOf("//") + 2); + String basePath = "/tmp/"; + try { + String rawPathString = basePath + pathString; + Path normalized = Paths.get(rawPathString).normalize(); + if(!rawPathString.equals(normalized.toString())) { + logger.warning("Non-normalized path in submitted identifier " + storageId); + return false; + } + System.out.println(normalized.getFileName().toString()); + if (!usesStandardNamePattern(normalized.getFileName().toString())) { + logger.warning("Unacceptable file name in submitted identifier: " + storageId); + return false; + } + + } catch (InvalidPathException ipe) { + logger.warning("Invalid Path in submitted identifier " + storageId); + return false; + } + return true; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index e550cae1373..50318f65211 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -937,9 +937,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing @@ -998,9 +998,9 @@ private String generateTemporaryS3UploadUrl(String key, Date expiration) throws // them for some servers, we check whether the protocol is in the url and then // normalizing to use the part without the protocol String endpointServer = endpoint; - int protocolEnd = endpoint.indexOf("://"); + int protocolEnd = endpoint.indexOf(DataAccess.SEPARATOR); if (protocolEnd >=0 ) { - endpointServer = endpoint.substring(protocolEnd + 3); + endpointServer = endpoint.substring(protocolEnd + DataAccess.SEPARATOR.length()); } logger.fine("Endpoint: " + endpointServer); // We're then replacing @@ -1274,5 +1274,37 @@ public boolean isMainDriver() { public void setMainDriver(boolean mainDriver) { this.mainDriver = mainDriver; } + + public static String getDriverPrefix(String driverId) { + return driverId+ DataAccess.SEPARATOR + getBucketName(driverId) + ":"; + } + + //Confirm inputs are of the form s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42 + protected static boolean isValidIdentifier(String driverId, String storageId) { + String storageBucketAndId = storageId.substring(storageId.lastIndexOf("//") + 2); + String bucketName = getBucketName(driverId); + if(bucketName==null) { + logger.warning("No bucket defined for " + driverId); + return false; + } + int index = storageBucketAndId.lastIndexOf(":"); + if(index<=0) { + logger.warning("No bucket defined in submitted identifier: " + storageId); + return false; + } + String idBucket = storageBucketAndId.substring(0, index); + String id = storageBucketAndId.substring(index+1); + System.out.println(id); + if(!bucketName.equals(idBucket)) { + logger.warning("Incorrect bucket in submitted identifier: " + storageId); + return false; + } + if (!usesStandardNamePattern(id)) { + logger.warning("Unacceptable identifier pattern in submitted identifier: " + storageId); + return false; + } + return true; + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java index 286fe8a6595..136f5a6be6d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java @@ -9,6 +9,8 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.FileUtil; + import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.ByteArrayInputStream; @@ -24,6 +26,9 @@ import org.junit.After; import org.junit.Test; import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.junit.Before; /** @@ -285,4 +290,24 @@ public void testGetAuxFileAsInputStream() throws IOException { } assertEquals("This is a test string\n", sb.toString()); } + + @Test + public void testFileIdentifierFormats() throws IOException { + System.setProperty("dataverse.files.filetest.type", "file"); + System.setProperty("dataverse.files.filetest.label", "Ftest"); + System.setProperty("dataverse.files.filetest.directory", "/tmp/mydata"); + + FileUtil.generateStorageIdentifier(); + assertTrue(DataAccess.isValidDirectStorageIdentifier("filetest://" + FileUtil.generateStorageIdentifier())); + //The tests here don't use a valid identifier string + assertFalse(DataAccess.isValidDirectStorageIdentifier(dataFile.getStorageIdentifier())); + //bad store id + assertFalse(DataAccess.isValidDirectStorageIdentifier("file://" + FileUtil.generateStorageIdentifier())); + //breakout + assertFalse(DataAccess.isValidDirectStorageIdentifier("filetest://../" + FileUtil.generateStorageIdentifier())); + + System.clearProperty("dataverse.files.filetest.type"); + System.clearProperty("dataverse.files.filetest.label"); + System.clearProperty("dataverse.files.filetest.directory"); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java index e2756d70663..0e8e8500a1e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOTest.java @@ -9,6 +9,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.api.UtilIT; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.FileUtil; + +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -42,11 +45,21 @@ public void setup() throws IOException { dataSet = MocksFactory.makeDataset(); dataFile.setOwner(dataSet); dataFileId = UtilIT.getRandomIdentifier(); - dataFile.setStorageIdentifier("s3://bucket:"+dataFileId); - dataSetAccess = new S3AccessIO<>(dataSet, null, s3client, "s3"); - dataFileAccess = new S3AccessIO<>(dataFile, null, s3client, "s3"); + System.setProperty("dataverse.files.s3test.type", "s3"); + System.setProperty("dataverse.files.s3test.label", "S3test"); + System.setProperty("dataverse.files.s3test.bucket-name", "thebucket"); + + dataFile.setStorageIdentifier("s3test://thebucket:"+dataFileId); + dataSetAccess = new S3AccessIO<>(dataSet, null, s3client, "s3test"); + dataFileAccess = new S3AccessIO<>(dataFile, null, s3client, "s3test"); } + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.s3test.type"); + System.clearProperty("dataverse.files.s3test.label"); + System.clearProperty("dataverse.files.s3test.bucket-name"); + } /* createTempFile getStorageLocation @@ -99,7 +112,7 @@ void keyNullstorageIdInvalid_getMainFileKey() throws IOException { @Test void default_getUrlExpirationMinutes() { // given - System.clearProperty("dataverse.files.s3.url-expiration-minutes"); + System.clearProperty("dataverse.files.s3test.url-expiration-minutes"); // when & then assertEquals(60, dataFileAccess.getUrlExpirationMinutes()); } @@ -107,7 +120,7 @@ void default_getUrlExpirationMinutes() { @Test void validSetting_getUrlExpirationMinutes() { // given - System.setProperty("dataverse.files.s3.url-expiration-minutes", "120"); + System.setProperty("dataverse.files.s3test.url-expiration-minutes", "120"); // when & then assertEquals(120, dataFileAccess.getUrlExpirationMinutes()); } @@ -115,9 +128,20 @@ void validSetting_getUrlExpirationMinutes() { @Test void invalidSetting_getUrlExpirationMinutes() { // given - System.setProperty("dataverse.files.s3.url-expiration-minutes", "NaN"); + System.setProperty("dataverse.files.s3test.url-expiration-minutes", "NaN"); // when & then assertEquals(60, dataFileAccess.getUrlExpirationMinutes()); } + @Test + void testS3IdentifierFormats() throws IOException { + assertTrue(DataAccess.isValidDirectStorageIdentifier("s3test://thebucket:" + FileUtil.generateStorageIdentifier())); + //The tests here don't use a valid identifier string + assertFalse(DataAccess.isValidDirectStorageIdentifier(dataFile.getStorageIdentifier())); + //bad store id + assertFalse(DataAccess.isValidDirectStorageIdentifier("s3://thebucket:" + FileUtil.generateStorageIdentifier())); + //bad bucket + assertFalse(DataAccess.isValidDirectStorageIdentifier("s3test://bucket:" + FileUtil.generateStorageIdentifier())); + } + } From 81a025bf1648d194290afbce9df3f173a6e15c10 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 18 Aug 2022 14:37:55 -0400 Subject: [PATCH 2/8] add up front check that direct-upload is allowed. --- .../edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 9 +++++++++ .../iq/dataverse/dataaccess/RemoteOverlayAccessIO.java | 1 + .../edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index bccaf58edfc..d046fa4661d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -331,14 +331,23 @@ public static boolean uploadToDatasetAllowed(Dataset d, String storageIdentifier String driverId = DataAccess.getStorageDriverFromIdentifier(storageIdentifier); String effectiveDriverId = d.getEffectiveStorageDriverId(); if(!effectiveDriverId.equals(driverId)) { + //Not allowed unless this is a remote store and you're uploading to the basestore if(getDriverType(driverId).equals(REMOTE)) { String baseDriverId = RemoteOverlayAccessIO.getBaseStoreIdFor(driverId); if(!effectiveDriverId.equals(baseDriverId)) { + //Not allowed - wrong base driver allowed = false; + } else { + //Only allowed if baseStore allows it + allowed = StorageIO.isDirectUploadEnabled(baseDriverId); } } else { + //Not allowed - wrong main driver allowed=false; } + } else { + //Only allowed if main store allows it + allowed = StorageIO.isDirectUploadEnabled(driverId); } return allowed; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index bc421949ed7..c8e42349318 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -630,4 +630,5 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { public static String getBaseStoreIdFor(String driverId) { return System.getProperty("dataverse.files." + driverId + ".base-store"); } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 46b4b13a889..2cd7247cd7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -594,6 +594,10 @@ public static String getDriverPrefix(String driverId) { return driverId+ DataAccess.SEPARATOR; } + public static boolean isDirectUploadEnabled(String driverId) { + return Boolean.getBoolean(System.getProperty("dataverse.files." + driverId + ".download-redirect", "false")); + } + //Check that storageIdentifier is consistent with store's config //False will prevent direct uploads protected static boolean isValidIdentifier(String driverId, String storageId) { @@ -607,4 +611,5 @@ protected static boolean usesStandardNamePattern(String identifier) { Matcher m = r.matcher(identifier); return m.find(); } + } From 8c23ecb8786699a6f5a451de86cebf09d6e90a5d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 19 Aug 2022 11:43:00 -0400 Subject: [PATCH 3/8] change default to false now that file and s3 have checks --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2cd7247cd7e..acb441267ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -601,7 +601,7 @@ public static boolean isDirectUploadEnabled(String driverId) { //Check that storageIdentifier is consistent with store's config //False will prevent direct uploads protected static boolean isValidIdentifier(String driverId, String storageId) { - return true; + return false; } //Utility to verify the standard UUID pattern for stored files. From 44922e23f88e95a7dabc6a93cf99574c65ed1e2c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 22 Aug 2022 11:52:34 -0400 Subject: [PATCH 4/8] add flyway for new constraint --- .../db/migration/V5.11.1.6__storageconstraint.sql | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql diff --git a/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql new file mode 100644 index 00000000000..82ae2608d78 --- /dev/null +++ b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql @@ -0,0 +1,10 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE dvobject ADD CONSTRAINT chk_dvobject_storageidentifier check (strpos(storageidentifier,'..') = 0); + EXCEPTION + WHEN duplicate_object THEN RAISE NOTICE 'Table constraint chk_dvobject_storageidentifier already exists'; + END; + +END $$; \ No newline at end of file From 2a57ff4f6b5f34fc126f4bc342976854caf53f84 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 22 Aug 2022 11:53:35 -0400 Subject: [PATCH 5/8] add newline --- .../resources/db/migration/V5.11.1.6__storageconstraint.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql index 82ae2608d78..c2629213e98 100644 --- a/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql +++ b/src/main/resources/db/migration/V5.11.1.6__storageconstraint.sql @@ -7,4 +7,4 @@ BEGIN WHEN duplicate_object THEN RAISE NOTICE 'Table constraint chk_dvobject_storageidentifier already exists'; END; -END $$; \ No newline at end of file +END $$; From 729dbbd1cf58753b26d33b3e7b215b3c2758fa70 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 22 Aug 2022 12:10:05 -0400 Subject: [PATCH 6/8] add check that file store isn't defined --- .../edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java index 136f5a6be6d..6c2d955bb11 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java @@ -302,6 +302,7 @@ public void testFileIdentifierFormats() throws IOException { //The tests here don't use a valid identifier string assertFalse(DataAccess.isValidDirectStorageIdentifier(dataFile.getStorageIdentifier())); //bad store id + assertTrue(System.getProperty("dataverse.files.filetest.type")==null); assertFalse(DataAccess.isValidDirectStorageIdentifier("file://" + FileUtil.generateStorageIdentifier())); //breakout assertFalse(DataAccess.isValidDirectStorageIdentifier("filetest://../" + FileUtil.generateStorageIdentifier())); From d25d97ae32a1f244f06c4c033e4ca7e38fa36d3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 22 Aug 2022 12:19:22 -0400 Subject: [PATCH 7/8] make sure file type is not set for test --- .../harvard/iq/dataverse/dataaccess/FileAccessIOTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java index 6c2d955bb11..95621dd8750 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java @@ -302,8 +302,14 @@ public void testFileIdentifierFormats() throws IOException { //The tests here don't use a valid identifier string assertFalse(DataAccess.isValidDirectStorageIdentifier(dataFile.getStorageIdentifier())); //bad store id - assertTrue(System.getProperty("dataverse.files.filetest.type")==null); + String defaultType = System.getProperty("dataverse.files.file.type"); + //Assure file isn't a defined store before test and reset afterwards if it was + System.clearProperty("dataverse.files.file.type"); assertFalse(DataAccess.isValidDirectStorageIdentifier("file://" + FileUtil.generateStorageIdentifier())); + if(defaultType!=null) { + System.out.println("dataverse.files.file.type reset to " + defaultType); + System.setProperty("dataverse.files.file.type", defaultType); + } //breakout assertFalse(DataAccess.isValidDirectStorageIdentifier("filetest://../" + FileUtil.generateStorageIdentifier())); From ef733af08ca7dd4d2498598d1ee2455b7ee29a61 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 13 Sep 2022 06:54:14 -0400 Subject: [PATCH 8/8] change printlns to logger.fine --- .../java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java | 2 +- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index f055ef3fb68..d5f00b9868f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -671,7 +671,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { logger.warning("Non-normalized path in submitted identifier " + storageId); return false; } - System.out.println(normalized.getFileName().toString()); + logger.fine(normalized.getFileName().toString()); if (!usesStandardNamePattern(normalized.getFileName().toString())) { logger.warning("Unacceptable file name in submitted identifier: " + storageId); return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 50318f65211..3c9cef04980 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1294,7 +1294,7 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { } String idBucket = storageBucketAndId.substring(0, index); String id = storageBucketAndId.substring(index+1); - System.out.println(id); + logger.fine(id); if(!bucketName.equals(idBucket)) { logger.warning("Incorrect bucket in submitted identifier: " + storageId); return false;