From 44ed7675fd889221b88b2cee7615c663fc762704 Mon Sep 17 00:00:00 2001 From: vshaniga Date: Sun, 22 Mar 2026 22:37:32 -0700 Subject: [PATCH 1/3] MLE-27554 MLE-27556: Fix XXE Injection - XML External Entity related to XMLInputFactory --- .../java/com/marklogic/contentpump/AggregateXMLReader.java | 6 ++++++ .../com/marklogic/contentpump/CompressedAggXMLReader.java | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java index 95e36e168..c303bbd14 100644 --- a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java @@ -110,6 +110,12 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) initAggConf(context); f = XMLInputFactory.newInstance(); + try { + f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + f.setProperty(XMLInputFactory.SUPPORT_DTD, false); + } catch (IllegalArgumentException e) { + LOG.warn("Unable to set XXE safety properties on XMLInputFactory", e); + } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); diff --git a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java index 03a389217..3c3789f9d 100644 --- a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java @@ -69,6 +69,12 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) initConfig(context); initAggConf(context); f = XMLInputFactory.newInstance(); + try { + f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + f.setProperty(XMLInputFactory.SUPPORT_DTD, false); + } catch (IllegalArgumentException e) { + LOG.warn("Unable to set XXE safety properties on XMLInputFactory", e); + } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); From d85cdd18ab9f21dea39981f8411deac4d54da5d2 Mon Sep 17 00:00:00 2001 From: vshaniga Date: Sun, 22 Mar 2026 22:49:00 -0700 Subject: [PATCH 2/3] MLE-27553 MLE-27555: Fix XXE Injection - XML External Entity related to XMLInputFactory --- src/main/java/com/marklogic/mapreduce/DOMDocument.java | 7 +++++++ src/main/java/com/marklogic/mapreduce/JSONDocument.java | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/main/java/com/marklogic/mapreduce/DOMDocument.java b/src/main/java/com/marklogic/mapreduce/DOMDocument.java index caead9a19..79f365796 100644 --- a/src/main/java/com/marklogic/mapreduce/DOMDocument.java +++ b/src/main/java/com/marklogic/mapreduce/DOMDocument.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; +import javax.xml.XMLConstants; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Source; @@ -68,6 +69,12 @@ public class DOMDocument extends ForestDocument { private static synchronized TransformerFactory getTransformerFactory() { if (transformerFactory == null) { transformerFactory = TransformerFactory.newInstance(); + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Unable to set XXE safety attributes on TransformerFactory", e); + } } return transformerFactory; diff --git a/src/main/java/com/marklogic/mapreduce/JSONDocument.java b/src/main/java/com/marklogic/mapreduce/JSONDocument.java index da65a304d..66eedbe41 100644 --- a/src/main/java/com/marklogic/mapreduce/JSONDocument.java +++ b/src/main/java/com/marklogic/mapreduce/JSONDocument.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; +import javax.xml.XMLConstants; import javax.xml.transform.TransformerFactory; import org.apache.commons.logging.Log; @@ -48,6 +49,12 @@ public class JSONDocument extends ForestDocument { private static synchronized TransformerFactory getTransformerFactory() { if (transformerFactory == null) { transformerFactory = TransformerFactory.newInstance(); + try { + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Unable to set XXE safety attributes on TransformerFactory", e); + } } return transformerFactory; From 17026becd3e73b3bc9f904ffb0bcce69b85513c6 Mon Sep 17 00:00:00 2001 From: vshaniga Date: Wed, 25 Mar 2026 22:56:03 -0700 Subject: [PATCH 3/3] MLE-27553 MLE-27554 MLE-27555 MLE-27556: Split XXE-prevention try/catch blocks and improve warning messages --- .../java/com/marklogic/contentpump/AggregateXMLReader.java | 6 +++++- .../com/marklogic/contentpump/CompressedAggXMLReader.java | 6 +++++- src/main/java/com/marklogic/mapreduce/DOMDocument.java | 6 +++++- src/main/java/com/marklogic/mapreduce/JSONDocument.java | 6 +++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java index c303bbd14..7cd2adbdb 100644 --- a/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/AggregateXMLReader.java @@ -112,9 +112,13 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) f = XMLInputFactory.newInstance(); try { f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property IS_SUPPORTING_EXTERNAL_ENTITIES on XMLInputFactory", e); + } + try { f.setProperty(XMLInputFactory.SUPPORT_DTD, false); } catch (IllegalArgumentException e) { - LOG.warn("Unable to set XXE safety properties on XMLInputFactory", e); + LOG.warn("Failed configuring XXE-prevention security property SUPPORT_DTD on XMLInputFactory", e); } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); diff --git a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java index 3c3789f9d..2a3be6d02 100644 --- a/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java +++ b/src/main/java/com/marklogic/contentpump/CompressedAggXMLReader.java @@ -71,9 +71,13 @@ public void initialize(InputSplit inSplit, TaskAttemptContext context) f = XMLInputFactory.newInstance(); try { f.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security property IS_SUPPORTING_EXTERNAL_ENTITIES on XMLInputFactory", e); + } + try { f.setProperty(XMLInputFactory.SUPPORT_DTD, false); } catch (IllegalArgumentException e) { - LOG.warn("Unable to set XXE safety properties on XMLInputFactory", e); + LOG.warn("Failed configuring XXE-prevention security property SUPPORT_DTD on XMLInputFactory", e); } setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); diff --git a/src/main/java/com/marklogic/mapreduce/DOMDocument.java b/src/main/java/com/marklogic/mapreduce/DOMDocument.java index 79f365796..6edd1e6a7 100644 --- a/src/main/java/com/marklogic/mapreduce/DOMDocument.java +++ b/src/main/java/com/marklogic/mapreduce/DOMDocument.java @@ -71,9 +71,13 @@ private static synchronized TransformerFactory getTransformerFactory() { transformerFactory = TransformerFactory.newInstance(); try { transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_DTD on TransformerFactory", e); + } + try { transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); } catch (IllegalArgumentException e) { - LOG.warn("Unable to set XXE safety attributes on TransformerFactory", e); + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_STYLESHEET on TransformerFactory", e); } } diff --git a/src/main/java/com/marklogic/mapreduce/JSONDocument.java b/src/main/java/com/marklogic/mapreduce/JSONDocument.java index 66eedbe41..bf334ddbe 100644 --- a/src/main/java/com/marklogic/mapreduce/JSONDocument.java +++ b/src/main/java/com/marklogic/mapreduce/JSONDocument.java @@ -51,9 +51,13 @@ private static synchronized TransformerFactory getTransformerFactory() { transformerFactory = TransformerFactory.newInstance(); try { transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + } catch (IllegalArgumentException e) { + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_DTD on TransformerFactory", e); + } + try { transformerFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); } catch (IllegalArgumentException e) { - LOG.warn("Unable to set XXE safety attributes on TransformerFactory", e); + LOG.warn("Failed configuring XXE-prevention security attribute ACCESS_EXTERNAL_STYLESHEET on TransformerFactory", e); } }