diff --git a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java index 95e36e168..7cd2adbdb 100644 --- a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java @@ -110,6 +110,16 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) initAggConf(context); f = XMLInputFactory.newInstance(); + try { + f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property IS_SUPPORTING_EXTERNAL_ENTITIES on XMLInputFactory", e); + } + try { + f.setProperty(XMLInputFactory.SUPPORT_DTD, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property SUPPORT_DTD on XMLInputFactory", e); + } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); diff --git a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java index 03a389217..2a3be6d02 100644 --- a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java @@ -69,6 +69,16 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) initConfig(context); initAggConf(context); f = XMLInputFactory.newInstance(); + try { + f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property IS_SUPPORTING_EXTERNAL_ENTITIES on XMLInputFactory", e); + } + try { + f.setProperty(XMLInputFactory.SUPPORT_DTD, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property SUPPORT_DTD on XMLInputFactory", e); + } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); diff --git a/src/main/java/com/marklogic/mapreduce/DOMDocument.java b/src/main/java/com/marklogic/mapreduce/DOMDocument.java index caead9a19..6edd1e6a7 100644 --- a/src/main/java/com/marklogic/mapreduce/DOMDocument.java +++ b/src/main/java/com/marklogic/mapreduce/DOMDocument.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; +import javax.xml.XMLConstants; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Source; @@ -68,6 +69,16 @@ public class DOMDocument extends ForestDocument { private static synchronized TransformerFactory getTransformerFactory() { if (transformerFactory == null) { transformerFactory = TransformerFactory.newInstance(); + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_DTD on TransformerFactory", e); + } + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_STYLESHEET on TransformerFactory", e); + } } return transformerFactory; diff --git a/src/main/java/com/marklogic/mapreduce/JSONDocument.java b/src/main/java/com/marklogic/mapreduce/JSONDocument.java index da65a304d..bf334ddbe 100644 --- a/src/main/java/com/marklogic/mapreduce/JSONDocument.java +++ b/src/main/java/com/marklogic/mapreduce/JSONDocument.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; +import javax.xml.XMLConstants; import javax.xml.transform.TransformerFactory; import org.apache.commons.logging.Log; @@ -48,6 +49,16 @@ public class JSONDocument extends ForestDocument { private static synchronized TransformerFactory getTransformerFactory() { if (transformerFactory == null) { transformerFactory = TransformerFactory.newInstance(); + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_DTD on TransformerFactory", e); + } + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_STYLESHEET on TransformerFactory", e); + } } return transformerFactory;