From bcee89f3f6548e6b987cbb6b51445fedecd4a2ef Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 11 Feb 2021 14:22:31 -0500 Subject: [PATCH 1/3] fix for the redetect-type-by-filename issue. #7527 --- .../engine/command/impl/RedetectFileTypeCommand.java | 3 ++- .../edu/harvard/iq/dataverse/util/FileTypeDetection.java | 7 ++++--- .../harvard/iq/dataverse/util/FileTypeDetectionTest.java | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index ef20ec76e12..00bea9489c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -62,7 +62,8 @@ public DataFile execute(CommandContext ctxt) throws CommandException { } logger.fine("target file: " + localFile); - String newlyDetectedContentType = FileTypeDetection.determineFileType(localFile); + String fileName = fileToRedetect.getLatestFileMetadata().getLabel(); + String newlyDetectedContentType = FileTypeDetection.determineFileType(localFile, fileName); fileToRedetect.setContentType(newlyDetectedContentType); } catch (IOException ex) { throw new CommandException("Exception while attempting to get the bytes of the file during file type redetection: " + ex.getLocalizedMessage(), this); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java b/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java index 52515c00524..d1534416dc1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileTypeDetection.java @@ -4,9 +4,10 @@ import java.io.IOException; public class FileTypeDetection { - - public static String determineFileType(File file) throws IOException { - return FileUtil.determineFileType(file, file.getName()); + // Question: why do we need this utility? - as opposed to just calling the + // static method in FileUtil directly? - L.A. + public static String determineFileType(File file, String fileName) throws IOException { + return FileUtil.determineFileType(file, fileName); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java index 5d2b9b4d56a..f1675a3728e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileTypeDetectionTest.java @@ -36,7 +36,7 @@ public static void tearDownClass() { public void testDetermineFileTypeJupyterNoteboook() throws Exception { File file = new File("src/test/java/edu/harvard/iq/dataverse/util/irc-metrics.ipynb"); // https://jupyter.readthedocs.io/en/latest/reference/mimetype.html - assertEquals("application/x-ipynb+json", FileTypeDetection.determineFileType(file)); + assertEquals("application/x-ipynb+json", FileTypeDetection.determineFileType(file, file.getName())); } } From 0b024e36df11998cf4bba98bba40edc62878c630 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 11 Feb 2021 18:09:11 -0500 Subject: [PATCH 2/3] removing a wasteful setting lookup in the metadata walker. #7527 --- .../iq/dataverse/util/DatasetFieldWalker.java | 102 ++++++++++-------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DatasetFieldWalker.java b/src/main/java/edu/harvard/iq/dataverse/util/DatasetFieldWalker.java index 4c6434b83a5..ba6eb71ab52 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DatasetFieldWalker.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DatasetFieldWalker.java @@ -15,11 +15,11 @@ import java.util.logging.Logger; /** - * A means of iterating over {@link DatasetField}s, or a collection of them. - * As these may have a complex structure (compound values, etc), this object - * allows processing them via an event stream, similar to SAX parsing of XML. - * Visiting of the fields is done in display order. - * + * A means of iterating over {@link DatasetField}s, or a collection of them. As + * these may have a complex structure (compound values, etc), this object allows + * processing them via an event stream, similar to SAX parsing of XML. Visiting + * of the fields is done in display order. + * * @author michael */ public class DatasetFieldWalker { @@ -27,89 +27,103 @@ public class DatasetFieldWalker { private static final Logger logger = Logger.getLogger(DatasetFieldWalker.class.getCanonicalName()); public interface Listener { - void startField( DatasetField f ); - void endField( DatasetField f ); - void primitiveValue( DatasetFieldValue dsfv ); - void controledVocabularyValue( ControlledVocabularyValue cvv ); - void startCompoundValue( DatasetFieldCompoundValue dsfcv ); - void endCompoundValue( DatasetFieldCompoundValue dsfcv ); + + void startField(DatasetField f); + + void endField(DatasetField f); + + void primitiveValue(DatasetFieldValue dsfv); + + void controledVocabularyValue(ControlledVocabularyValue cvv); + + void startCompoundValue(DatasetFieldCompoundValue dsfcv); + + void endCompoundValue(DatasetFieldCompoundValue dsfcv); } - + /** * Convenience method to walk over a field. + * * @param dsf the field to walk over. * @param l the listener to execute on {@code dsf}'s values and structure. */ - public static void walk( DatasetField dsf, Listener l ) { + public static void walk(DatasetField dsf, Listener l) { DatasetFieldWalker joe = new DatasetFieldWalker(l); SettingsServiceBean nullServiceBean = null; joe.walk(dsf, nullServiceBean); } /** - * Convenience method to walk over a list of fields. Traversal - * is done in display order. + * Convenience method to walk over a list of fields. Traversal is done in + * display order. + * * @param fields the fields to go over. Does not have to be sorted. * @param exclude the fields to skip * @param l the listener to execute on each field values and structure. */ public static void walk(List fields, SettingsServiceBean settingsService, Listener l) { DatasetFieldWalker joe = new DatasetFieldWalker(l); - for ( DatasetField dsf : sort( fields, DatasetField.DisplayOrder) ) { + for (DatasetField dsf : sort(fields, DatasetField.DisplayOrder)) { joe.walk(dsf, settingsService); } } - + private Listener l; - - + public DatasetFieldWalker(Listener l) { this.l = l; } - - public DatasetFieldWalker(){ - this( null ); + + public DatasetFieldWalker() { + this(null); } - + public void walk(DatasetField fld, SettingsServiceBean settingsService) { l.startField(fld); DatasetFieldType datasetFieldType = fld.getDatasetFieldType(); + boolean excludeEmailFromExport = settingsService != null + && settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false); - if ( datasetFieldType.isControlledVocabulary() ) { - for ( ControlledVocabularyValue cvv - : sort(fld.getControlledVocabularyValues(), ControlledVocabularyValue.DisplayOrder) ) { + if (datasetFieldType.isControlledVocabulary()) { + for (ControlledVocabularyValue cvv + : sort(fld.getControlledVocabularyValues(), ControlledVocabularyValue.DisplayOrder)) { l.controledVocabularyValue(cvv); } - - } else if ( datasetFieldType.isPrimitive() ) { - for ( DatasetFieldValue pv : sort(fld.getDatasetFieldValues(), DatasetFieldValue.DisplayOrder) ) { - if (settingsService != null && settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false) && DatasetFieldType.FieldType.EMAIL.equals(pv.getDatasetField().getDatasetFieldType().getFieldType())) { + + } else if (datasetFieldType.isPrimitive()) { + for (DatasetFieldValue pv : sort(fld.getDatasetFieldValues(), DatasetFieldValue.DisplayOrder)) { + //if (settingsService != null && settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false) && DatasetFieldType.FieldType.EMAIL.equals(pv.getDatasetField().getDatasetFieldType().getFieldType())) { + // Please note that settingsService.isTrueForKey(...) is not free, + // it translates into one named database query on the setting table. + // Not implying that this will solve the EJB rollback issue in #7527 + // somehow, but it is insane that we were doing that for every primitive + // field in the dataset metadata. -- L.A. + if (DatasetFieldType.FieldType.EMAIL.equals(pv.getDatasetField().getDatasetFieldType().getFieldType()) && excludeEmailFromExport) { continue; } l.primitiveValue(pv); } - - } else if ( datasetFieldType.isCompound() ) { - for ( DatasetFieldCompoundValue dsfcv : sort( fld.getDatasetFieldCompoundValues(), DatasetFieldCompoundValue.DisplayOrder) ) { - l.startCompoundValue(dsfcv); - for ( DatasetField dsf : sort(dsfcv.getChildDatasetFields(), DatasetField.DisplayOrder ) ) { - walk(dsf, settingsService); - } - l.endCompoundValue(dsfcv); - } + + } else if (datasetFieldType.isCompound()) { + for (DatasetFieldCompoundValue dsfcv : sort(fld.getDatasetFieldCompoundValues(), DatasetFieldCompoundValue.DisplayOrder)) { + l.startCompoundValue(dsfcv); + for (DatasetField dsf : sort(dsfcv.getChildDatasetFields(), DatasetField.DisplayOrder)) { + walk(dsf, settingsService); + } + l.endCompoundValue(dsfcv); + } } l.endField(fld); } - - + public void setL(Listener l) { this.l = l; } - - static private Iterable sort( List list, Comparator cmp ) { + + static private Iterable sort(List list, Comparator cmp) { ArrayList tbs = new ArrayList<>(list); Collections.sort(tbs, cmp); return tbs; } - + } From 0e88fef3592023a1a67ffc9c65e2c93ce527b110 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 23 Feb 2021 09:22:15 -0500 Subject: [PATCH 3/3] Making the API bean officially stateless. #7527 --- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index defc2c4d9ab..80a5b618073 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -48,6 +48,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJB; +import javax.ejb.Stateless; import javax.inject.Inject; import javax.json.Json; import javax.json.JsonReader; @@ -70,6 +71,7 @@ import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; +@Stateless @Path("files") public class Files extends AbstractApiBean {