diff --git a/mathmlquerygenerator.iml b/mathmlquerygenerator.iml index 83495b2..442728d 100644 --- a/mathmlquerygenerator.iml +++ b/mathmlquerygenerator.iml @@ -11,6 +11,7 @@ + diff --git a/pom.xml b/pom.xml index 3fdecc1..24a18dd 100644 --- a/pom.xml +++ b/pom.xml @@ -46,6 +46,11 @@ + + net.sf.saxon + Saxon-HE + 9.5.1-6 + junit junit diff --git a/src/main/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReader.java b/src/main/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReader.java index fe32285..f25c95b 100644 --- a/src/main/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReader.java +++ b/src/main/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReader.java @@ -1,7 +1,7 @@ package com.formulasearchengine.mathmlquerygenerator; -import com.formulasearchengine.xmlhelper.DomDocumentHelper; -import com.formulasearchengine.xmlhelper.NonWhitespaceNodeList; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.XMLHelper; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.NonWhitespaceNodeList; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -15,7 +15,7 @@ import java.util.ArrayList; import java.util.List; -import static com.formulasearchengine.xmlhelper.NonWhitespaceNodeList.getFirstChild; +import static com.formulasearchengine.mathmlquerygenerator.xmlhelper.NonWhitespaceNodeList.getFirstChild; /** * Created by Moritz on 08.11.2014. @@ -35,7 +35,7 @@ public NtcirTopicReader( Document topics ) { } public NtcirTopicReader( File topicFile ) throws ParserConfigurationException, IOException, SAXException { - DocumentBuilder documentBuilder = DomDocumentHelper.getDocumentBuilderFactory().newDocumentBuilder(); + DocumentBuilder documentBuilder = XMLHelper.getDocumentBuilder( true ); topics = documentBuilder.parse( topicFile ); //TODO: Find out how this code duplication can be avoided in Java. @@ -75,7 +75,7 @@ public final NtcirTopicReader setAddQvarMap( boolean addQvarMap ) { * @throws XPathExpressionException Thrown if xpaths fail to compile or fail to evaluate + */ public final List extractPatterns() throws XPathExpressionException { - final XPath xpath = DomDocumentHelper.namespaceAwareXpath( "t", NS_NII ); + final XPath xpath = XMLHelper.namespaceAwareXpath( "t", NS_NII ); final XPathExpression xNum = xpath.compile( "./t:num" ); final XPathExpression xFormula = xpath.compile( "./t:query/t:formula" ); final NonWhitespaceNodeList topicList = new NonWhitespaceNodeList( diff --git a/src/main/java/com/formulasearchengine/mathmlquerygenerator/XQueryGenerator.java b/src/main/java/com/formulasearchengine/mathmlquerygenerator/XQueryGenerator.java index 034d424..dd6067d 100644 --- a/src/main/java/com/formulasearchengine/mathmlquerygenerator/XQueryGenerator.java +++ b/src/main/java/com/formulasearchengine/mathmlquerygenerator/XQueryGenerator.java @@ -1,8 +1,8 @@ package com.formulasearchengine.mathmlquerygenerator; -import com.formulasearchengine.xmlhelper.DomDocumentHelper; -import com.formulasearchengine.xmlhelper.NonWhitespaceNodeList; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.XMLHelper; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.NonWhitespaceNodeList; import com.google.common.collect.Lists; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -14,7 +14,7 @@ import java.util.*; import java.util.regex.Pattern; -import static com.formulasearchengine.xmlhelper.NonWhitespaceNodeList.getFirstChild; +import static com.formulasearchengine.mathmlquerygenerator.xmlhelper.NonWhitespaceNodeList.getFirstChild; /** * Converts MathML queries into XQueries. @@ -48,7 +48,7 @@ public class XQueryGenerator { */ public XQueryGenerator( String input ) throws IOException, SAXException, ParserConfigurationException { - final Document xml = DomDocumentHelper.String2Doc( input ); + final Document xml = XMLHelper.String2Doc( input, true ); this.mainElement = getMainElement( xml ); } diff --git a/src/main/java/com/formulasearchengine/xmlhelper/NonWhitespaceNodeList.java b/src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/NonWhitespaceNodeList.java similarity index 95% rename from src/main/java/com/formulasearchengine/xmlhelper/NonWhitespaceNodeList.java rename to src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/NonWhitespaceNodeList.java index 6ec8bf1..0506754 100644 --- a/src/main/java/com/formulasearchengine/xmlhelper/NonWhitespaceNodeList.java +++ b/src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/NonWhitespaceNodeList.java @@ -1,4 +1,4 @@ -package com.formulasearchengine.xmlhelper; +package com.formulasearchengine.mathmlquerygenerator.xmlhelper; import org.w3c.dom.Node; import org.w3c.dom.NodeList; diff --git a/src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelper.java b/src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelper.java new file mode 100644 index 0000000..dc916b3 --- /dev/null +++ b/src/main/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelper.java @@ -0,0 +1,578 @@ +package com.formulasearchengine.mathmlquerygenerator.xmlhelper; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; +import net.sf.saxon.Configuration; +import net.sf.saxon.s9api.*; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import javax.xml.namespace.NamespaceContext; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMResult; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.sax.SAXSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; +import javax.xml.xpath.*; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +// TODO: Auto-generated Javadoc + +/** + * The Class XMLHelper. + */ +@SuppressWarnings("UnusedDeclaration") +public final class XMLHelper { + + /** + * The factory. + */ + private static XPathFactory factory = XPathFactory.newInstance(); + + /** + * The xpath. + */ + private static XPath xpath = factory.newXPath(); + + + + // + private static ArrayList> traverseNode(Node n, String p) { + ArrayList> output = new ArrayList<>(); + String nName; + if (n.getNodeType() != Node.TEXT_NODE) { + nName = n.getNodeName(); + if (nName.startsWith("m:")) + nName = nName.substring(2); + if (nName.equals("mws:qvar")) + return new ArrayList<>(); + p += "/" + nName; + } + String nValue = n.getNodeValue(); + if (nValue != null) { + nValue = nValue.trim(); + if (nValue.length() == 0) { + return new ArrayList<>(); + } + } else { + nValue = ""; + } + + if (!n.hasChildNodes()) { + output.add(new SimpleEntry<>(p, nValue)); + } else { + for (int i = 0; i < n.getChildNodes().getLength(); i++) { + output.addAll(traverseNode(n.getChildNodes().item(i), p)); + } + } + return output; + } + + public static ArrayList> getMMLLeaves(Node n) throws XPathExpressionException{ + Node cmmlRoot = XMLHelper.getElementB(n, "./semantics/*[1]"); + return traverseNode(cmmlRoot, ""); + + } + + + + /** + * Helper program: Extracts the specified XPATH expression + * from an XML-String. + * + * @param InputXMLString the input xml string + * @param XPath the x path + * @return NodeList + * @throws ParserConfigurationException the parser configuration exception + * @throws IOException Signals that an I/O exception has occurred. + * @throws XPathExpressionException the x path expression exception + */ + public static NodeList String2NodeList(String InputXMLString, + String XPath) throws ParserConfigurationException, + IOException, XPathExpressionException { + + Document doc = String2Doc(InputXMLString, false); + XPathFactory factory = XPathFactory.newInstance(); + XPath xpath = factory.newXPath(); + XPathExpression expr = xpath.compile(XPath); //compile XML tag extractor sent as param + + Object result = expr.evaluate(doc, XPathConstants.NODESET); + return (NodeList) result; + + } + + /** + * Helper program: Extracts the specified XPATH expression + * from an XML-String. + * + * @param node the node + * @param XPath the x path + * @return NodeList + * @throws XPathExpressionException the x path expression exception + */ + public static Node getElementB(Node node, String XPath) throws XPathExpressionException { + + + XPathExpression expr = xpath.compile(XPath); + return getElementB(node, expr); + + } + + /** + * Helper program: Extracts the specified XPATH expression + * from an XML-String. + * + * @param node the node + * @param XPath the x path + * @return NodeList + * @throws XPathExpressionException the x path expression exception + */ + public static Node getElementB(Node node, XPathExpression XPath) throws XPathExpressionException { + + return getElementsB(node, XPath).item(0); + + } + + /** + * Helper program: Extracts the specified XPATH expression + * from an XML-String. + * + * @param node the node + * @param XPath the x path + * @return NodeList + * @throws XPathExpressionException the x path expression exception + */ + public static NodeList getElementsB(Node node, XPathExpression XPath) + throws XPathExpressionException { + + return (NodeList) XPath.evaluate(node, XPathConstants.NODESET); + + } + + /** + * Helper program: Extracts the specified XPATH expression + * from an XML-String. + * + * @param node the node + * @param xString the x path + * @return NodeList + * @throws XPathExpressionException the x path expression exception + */ + public static NodeList getElementsB(Node node, String xString) throws XPathExpressionException { + XPathExpression xPath = compileX(xString); + return (NodeList) xPath.evaluate(node, XPathConstants.NODESET); + + } + + /** + * Helper program: Transforms a String to a XML Document. + * + * @param InputXMLString the input xml string + * @param NamespaceAwareness the namespace awareness + * @return parsed document + * @throws ParserConfigurationException the parser configuration exception + * @throws IOException Signals that an I/O exception has occurred. + */ + + public static Document String2Doc(String InputXMLString, boolean NamespaceAwareness) { + try { + DocumentBuilder builder = getDocumentBuilder(NamespaceAwareness); + InputSource is = new InputSource(new StringReader(InputXMLString)); + is.setEncoding("UTF-8"); + return builder.parse(is); + } catch (SAXException | ParserConfigurationException | IOException e) { + System.out.println("cannot parse following content\\n\\n" + InputXMLString); + e.printStackTrace(); + return null; + } + + } + + public static Document getNewDocument() throws ParserConfigurationException { + return getNewDocument(false); + } + + public static Document getNewDocument(Boolean nameSpaceAwareness) throws ParserConfigurationException { + DocumentBuilder builder = getDocumentBuilder(false); + return builder.newDocument(); + } + + public static DocumentBuilder getDocumentBuilder(boolean NamespaceAwareness) throws ParserConfigurationException { + DocumentBuilderFactory domFactory = DocumentBuilderFactory + .newInstance(); + domFactory.setNamespaceAware(NamespaceAwareness); + // Unfortunately we can not ignore whitespaces without a schema. + // So we use the NdLst workaround for now. + //domFactory.setValidating(true); + //domFactory.setIgnoringElementContentWhitespace( true ); + domFactory.setAttribute( + "http://apache.org/xml/features/dom/include-ignorable-whitespace", + Boolean.FALSE); + + return domFactory.newDocumentBuilder(); + } + + /** + * Returns a list of unique identifiers from a MathML string. + * This function searches for all mi- or ci-tags within + * the string. + * + * @param mathml + * @return a list of unique identifiers. When no identifiers were + * found, an empty list will be returned. + */ + @SuppressWarnings("JavaDoc") + public static Multiset getIdentifiersFrom(String mathml) { + Multiset list = HashMultiset.create(); + Pattern p = Pattern.compile("<((m:)?[mc][ion])(.*?)>(.{1,4}?)", Pattern.DOTALL); + Matcher m = p.matcher(mathml); + while (m.find()) { + String identifier = m.group(4); + list.add(identifier); + } + return list; + } + + /** + * Returns a list of unique identifiers from a MathML string. + * This function searches for all mi or ci tags within + * the string. + * + * @param mathml + * @return a list of unique identifiers. When no identifiers were + * found, an empty list will be returned. + */ + @SuppressWarnings("JavaDoc") + public static Multiset getIdentifiersFromQuery(String mathml) { + Multiset list = HashMultiset.create(); + Pattern p = Pattern.compile("[mc][ion]\\[([^\\]]{1,4})\\]"); + Matcher m = p.matcher(mathml); + while (m.find()) { + String identifier = m.group(1); + list.add(identifier); + } + return list; + } + + /** + * @param cmml the input node + * @return + * @throws XPathExpressionException + */ + public static Multiset getIdentifiersFromCmml(Node cmml) throws XPathExpressionException { + Multiset list = HashMultiset.create(); + //System.out.println(printDocument(cmml)); + NodeList identifier = getElementsB(cmml, "*//ci|*//co|*//cn"); // + int len = identifier.getLength(); + // System.out.println( "found " + len + "elements" ); + for (int i = 0; i < len; i++) { + list.add(identifier.item(i).getTextContent().trim()); + } + return list; + } + + /*the document. + * + * @param doc the doc + * @return the string + * @throws IOException Signals that an I/O exception has occurred. + * @throws TransformerException the transformer exception + */ + public static String printDocument(Node doc) throws TransformerException { + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + StringWriter sw = new StringWriter(); + transformer.transform(new DOMSource(doc), + new StreamResult(sw)); + return sw.toString(); + } + + /** + * Prie x. + * + * @param xString the x string + * @return the x path expression + * @throws XPathExpressionException the x path expression exception + */ + public static XPathExpression compileX(String xString) throws XPathExpressionException { + return xpath.compile(xString); + } + + public static double calculateBagScore(Multiset reference, Multiset actual) { + if (reference.containsAll(actual)) { + return 10.; + } else { + return 0; + } + } + + public static double calculateSimilarityScore(Node query, Node node, Map qvars) { + query.normalize(); + node.normalize(); + qvars.clear(); + Node qml; + try { + qml = getElementB(query, "//semantics/*[1]"); + Node nml = getElementB(node, "//semantics/annotation-xml/*[1]"); + if (compareNode(qml, nml, true, qvars)) { + return 100.; + } + } catch (Exception e) { + e.printStackTrace(); + } + //TODO add more options here + return 0.; + } + + public static boolean compareNode(Node nQ, Node nN, Boolean considerLength, Map qvars) throws Exception { + /*System.out.println("current query tree:"); + try { + System.out.println(printDocument(nQ)); + }catch (Exception e) { + e.printStackTrace(); + } + System.out.println("current comp tree:"); + try { + System.out.println(printDocument(nN)); + }catch (Exception e) { + e.printStackTrace(); + }// END OF DEBUG output XML */ + if (qvars == null) { + throw new Exception("qvars array must not be null"); + } + if (nQ.hasChildNodes()) { + int nQChildLength = nQ.getChildNodes().getLength(); + if (nN.hasChildNodes() && + (!considerLength || nQChildLength == nN.getChildNodes().getLength())) { + //loop through all childnodes + for (int i = 0; i < nQChildLength; i++) { + //System.out.println("recurse to "+ nQ.getChildNodes().item( i )+"vs"+nN.getChildNodes().item( i )); //DEBUG output XML + if (!compareNode(nQ.getChildNodes().item(i), nN.getChildNodes().item(i), considerLength, qvars)) { + return false; + } + } + } + } + //check for qvar descendant, add to qvar hashmap for checking (required for checking multiple qvars) + if (nQ.getNodeName().equals("mws:qvar")) { + String qvarName = nQ.getAttributes().getNamedItem("name").getNodeValue(); + if (qvars.containsKey(qvarName)) { + return compareNode(qvars.get(qvarName), nN, considerLength, qvars); + } else { + qvars.put(qvarName, nN); + return true; + } + } else { + //Attributes are ignored; child nodelists are not equal in length and considerlength is false OR reached lowest level: therefore check nodevalue + if (nQ.getNodeName().equals(nN.getNodeName())) { + try { + return nQ.getNodeValue().trim().equals(nN.getNodeValue().trim()); + } catch (NullPointerException e) { + //NodeValue does not exist + return true; + } + } else { + return false; + } + } + } + + public void comileXQuery() { + //XQueryCompiler xqueryCompiler; + } + + + + /** + * The Class Mynode. + */ + private static class Mynode { + + /** + * The node. + */ + public Node node; + + /** + * The q var. + */ + public Map qVar; + + /** + * The out. + */ + public String out; + + /** + * Instantiates a new mynode. + * + * @param node the node + * @param qVar the q var + */ + public Mynode(Node node, Map qVar) { + this.node = node; + this.qVar = qVar; + } + } + + /** + * Compil + * /** + * The Class NdLst. + */ + @SuppressWarnings("UnusedDeclaration") + public static class NdLst implements NodeList, Iterable { + + /** + * The nodes. + */ + private List nodes; + + /** + * Instantiates a new nd lst. + * + * @param list the list + */ + public NdLst(NodeList list) { + nodes = new ArrayList<>(); + for (int i = 0; i < list.getLength(); i++) { + if (!isWhitespaceNode(list.item(i))) { + nodes.add(list.item(i)); + } + } + } + + /** + * Checks if is whitespace node. + * + * @param n the n + * @return true, if is whitespace node + */ + private static boolean isWhitespaceNode(Node n) { + if (n.getNodeType() == Node.TEXT_NODE) { + String val = n.getNodeValue(); + return val.trim().length() == 0; + } else { + return false; + } + } + + /* (non-Javadoc) + * @see org.w3c.dom.NodeList#item(int) + */ + @Override + public Node item(int index) { + return nodes.get(index); + } + + /* (non-Javadoc) + * @see org.w3c.dom.NodeList#getLength() + */ + @Override + public int getLength() { + return nodes.size(); + } + + /* (non-Javadoc) + * @see java.lang.Iterable#iterator() + */ + @Override + public Iterator iterator() { + return nodes.iterator(); + } + } + + public static Document XslTransform(Node srcNode, String xsltResourceNamme) throws TransformerException, ParserConfigurationException { + System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); + final InputStream is = XMLHelper.class.getClassLoader().getResourceAsStream(xsltResourceNamme); + Document doc = getNewDocument(); + TransformerFactory tFactory = TransformerFactory.newInstance(); + Transformer transformer = tFactory.newTransformer(new StreamSource(is)); + transformer.transform(new DOMSource(srcNode), new DOMResult(doc)); + return doc; + } + + public static XQueryCompiler getXQueryCompiler(){ + Configuration saxonConfig = new Configuration(); + Processor processor = new Processor(saxonConfig); + return processor.newXQueryCompiler(); + } + + public static XQueryExecutable compileXQuerySting(String xQuery){ + try { + return getXQueryCompiler().compile(xQuery); + } catch (SaxonApiException e) { + e.printStackTrace(); + return null; + } + } + + public static Document runXQuery(XQueryExecutable query, String source) throws SaxonApiException, ParserConfigurationException { + XQueryEvaluator xqueryEval = query.load(); + xqueryEval.setSource(new SAXSource(new InputSource( + new StringReader(source)))); + Document doc = XMLHelper.getNewDocument(); + xqueryEval.run(new DOMDestination(doc)); + return doc; + } + public static Document runXQuery(XQueryExecutable query, Document doc) throws SaxonApiException, ParserConfigurationException { + Processor proc = new Processor(false); + XdmNode temp = proc.newDocumentBuilder().wrap(doc); + XQueryEvaluator xqueryEval = query.load(); + + xqueryEval.setContextItem(temp); + Document out = XMLHelper.getNewDocument(true); + xqueryEval.run(new DOMDestination(out)); + return out; + } + public static XPath namespaceAwareXpath(final String prefix, final String nsURI) { + XPathFactory xPathfactory = XPathFactory.newInstance(); + XPath xpath = xPathfactory.newXPath(); + NamespaceContext ctx = new NamespaceContext() { + @Override + public String getNamespaceURI(String aPrefix) { + if (aPrefix.equals(prefix)) + return nsURI; + else + return null; + } + @Override + public Iterator getPrefixes(String val) { + throw new UnsupportedOperationException(); + } + @Override + public String getPrefix(String uri) { + throw new UnsupportedOperationException(); + } + }; + xpath.setNamespaceContext(ctx); + return xpath; + } + +} diff --git a/src/main/java/com/formulasearchengine/xmlhelper/DomDocumentHelper.java b/src/main/java/com/formulasearchengine/xmlhelper/DomDocumentHelper.java deleted file mode 100644 index 588612b..0000000 --- a/src/main/java/com/formulasearchengine/xmlhelper/DomDocumentHelper.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.formulasearchengine.xmlhelper; - -import org.w3c.dom.Document; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -import javax.xml.namespace.NamespaceContext; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathFactory; -import java.io.IOException; -import java.io.StringReader; -import java.util.Iterator; - -/** - * Created by Moritz Schubotz on 3/10/15. - */ -public class DomDocumentHelper { - private DomDocumentHelper() { - } - - public static DocumentBuilderFactory getDocumentBuilderFactory() { - final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - documentBuilderFactory.setNamespaceAware( true ); - return documentBuilderFactory; - } - - public static XPath namespaceAwareXpath( final String prefix, final String nsURI ) { - final XPathFactory xPathfactory = XPathFactory.newInstance(); - final XPath xpath = xPathfactory.newXPath(); - final NamespaceContext ctx = new NamespaceContext() { - @Override - public String getNamespaceURI( String aPrefix ) { - if ( aPrefix.equals( prefix ) ) { - return nsURI; - } - throw new IllegalArgumentException(aPrefix); - } - - @Override - public Iterator getPrefixes( String val ) { - throw new UnsupportedOperationException(); - } - - @Override - public String getPrefix( String uri ) { - throw new UnsupportedOperationException(); - } - }; - xpath.setNamespaceContext( ctx ); - return xpath; - } - - /** - * Helper program: Transforms a String to a XML Document. - * - * @param InputXMLString the input xml string - * @return parsed document - * @throws javax.xml.parsers.ParserConfigurationException the parser configuration exception - * @throws java.io.IOException Signals that an I/O exception has occurred. - */ - public static Document String2Doc( String InputXMLString ) - throws ParserConfigurationException, IOException, SAXException { - DocumentBuilder builder = getDocumentBuilderFactory().newDocumentBuilder(); - InputSource is = new InputSource( new StringReader( InputXMLString ) ); - is.setEncoding( "UTF-8" ); - return builder.parse( is ); - } -} diff --git a/src/test/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReaderTest.java b/src/test/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReaderTest.java index 0661717..cade216 100644 --- a/src/test/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReaderTest.java +++ b/src/test/java/com/formulasearchengine/mathmlquerygenerator/NtcirTopicReaderTest.java @@ -1,6 +1,6 @@ package com.formulasearchengine.mathmlquerygenerator; -import com.formulasearchengine.xmlhelper.DomDocumentHelper; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.XMLHelper; import org.junit.Test; import org.w3c.dom.Document; import org.xml.sax.SAXException; @@ -88,7 +88,7 @@ public void testSetRestricLength() throws Exception { @Test public void testAlternativeConstructor() throws Exception{ final URL resource = getClass().getClassLoader().getResource( ARXIV_RESOURCE ); - DocumentBuilder documentBuilder = DomDocumentHelper.getDocumentBuilderFactory().newDocumentBuilder(); + DocumentBuilder documentBuilder = XMLHelper.getDocumentBuilder( true ); Document topics = documentBuilder.parse( new File( resource.toURI() ) ); new NtcirTopicReader( topics ); new NtcirTopicReader( topics, "", "" , false ); diff --git a/src/test/java/com/formulasearchengine/mathmlquerygenerator/XMLHelper.java b/src/test/java/com/formulasearchengine/mathmlquerygenerator/XMLHelper.java deleted file mode 100644 index 51de964..0000000 --- a/src/test/java/com/formulasearchengine/mathmlquerygenerator/XMLHelper.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.formulasearchengine.mathmlquerygenerator; - -import org.w3c.dom.Document; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import java.io.IOException; -import java.io.StringReader; - -class XMLHelper { - /** - * Helper program: Transforms a String to a XML Document. - * - * @param InputXMLString the input xml string - * @return parsed document - * @throws ParserConfigurationException the parser configuration exception - * @throws IOException Signals that an I/O exception has occurred. - */ - public static Document String2Doc( String InputXMLString ) - throws ParserConfigurationException, IOException { - DocumentBuilder builder = getDocumentBuilder(); - InputSource is = new InputSource( new StringReader( InputXMLString ) ); - is.setEncoding( "UTF-8" ); - try { - return builder.parse( is ); - } catch ( SAXException e ) { - System.out.println( "cannot parse following content\\n\\n" + InputXMLString ); - e.printStackTrace(); - return null; - } - } - - private static DocumentBuilder getDocumentBuilder() throws ParserConfigurationException { - DocumentBuilderFactory domFactory = DocumentBuilderFactory - .newInstance(); - domFactory.setNamespaceAware( true ); - // Unfortunately we can not ignore whitespaces without a schema. - // So we use the NdLst workaround for now. - domFactory.setAttribute( - "http://apache.org/xml/features/dom/include-ignorable-whitespace", - Boolean.FALSE ); - return domFactory.newDocumentBuilder(); - } -} diff --git a/src/test/java/com/formulasearchengine/mathmlquerygenerator/XQueryGeneratorTest.java b/src/test/java/com/formulasearchengine/mathmlquerygenerator/XQueryGeneratorTest.java index 911824f..e237edc 100644 --- a/src/test/java/com/formulasearchengine/mathmlquerygenerator/XQueryGeneratorTest.java +++ b/src/test/java/com/formulasearchengine/mathmlquerygenerator/XQueryGeneratorTest.java @@ -3,6 +3,8 @@ import junit.framework.TestCase; import org.w3c.dom.Document; +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.XMLHelper; + import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -54,7 +56,7 @@ private void runTestCollection( File dir ) { fail( "Cannot load test tuple (" + resultPath + ", ... " + nextFile.getName() + " )" ); } try { - query = XMLHelper.String2Doc( queryString ); + query = XMLHelper.String2Doc( queryString, true ); } catch ( Exception e ) { e.printStackTrace(); fail( "Cannot parse reference document " + nextFile.getName() ); @@ -94,7 +96,7 @@ public void testHeaderAndFooter() throws Exception { "return\n" + "$x}\n" + ""; - Document query = XMLHelper.String2Doc( testInput ); + Document query = XMLHelper.String2Doc( testInput, true ); XQueryGenerator xQueryGenerator = new XQueryGenerator( query ); xQueryGenerator.setFooter( testFooter ); xQueryGenerator.setHeader( testHead ); @@ -111,7 +113,7 @@ public void testNoRestriction() throws Exception { "$x/*[2]/*[2] = $x/*[3]\n" + "let $q := map {\"x\" : (data($x/*[2]/*[2]/@xml:id),data($x/*[3]/@xml-id))}\n\n" + "return\n"; - Document query = XMLHelper.String2Doc( testInput ); + Document query = XMLHelper.String2Doc( testInput, true ); XQueryGenerator xQueryGenerator = new XQueryGenerator( query ); xQueryGenerator.setFooter( testFooter ); xQueryGenerator.setHeader( testHead ); diff --git a/src/test/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelperTest.java b/src/test/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelperTest.java new file mode 100644 index 0000000..5b6db8d --- /dev/null +++ b/src/test/java/com/formulasearchengine/mathmlquerygenerator/xmlhelper/XMLHelperTest.java @@ -0,0 +1,15 @@ +package com.formulasearchengine.mathmlquerygenerator.xmlhelper; + +import com.formulasearchengine.mathmlquerygenerator.xmlhelper.XMLHelper; +import org.junit.Assert; +import org.junit.Test; + +public class XMLHelperTest { + + @Test + public void testString2Doc () throws Exception { + Assert.assertNull( XMLHelper.String2Doc( "", true ) ); + Assert.assertNotNull( XMLHelper.String2Doc( "", true ) ); + XMLHelper x = new XMLHelper(); //Does not really make sense + } +} \ No newline at end of file