diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java index 3ffeba0cf..7d3552668 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java @@ -141,7 +141,14 @@ public void visit(IndentedCodeBlock indentedCodeBlock) { @Override public void visit(Link link) { Map attrs = new LinkedHashMap<>(); - String url = context.encodeUrl(link.getDestination()); + String url = link.getDestination(); + + if (context.shouldSanitizeUrls()) { + url = context.urlSanitizer().sanitizeLinkUrl(url); + attrs.put("rel", "nofollow"); + } + + url = context.encodeUrl(url); attrs.put("href", url); if (link.getTitle() != null) { attrs.put("title", link.getTitle()); @@ -171,14 +178,18 @@ public void visit(OrderedList orderedList) { @Override public void visit(Image image) { - String url = context.encodeUrl(image.getDestination()); + String url = image.getDestination(); AltTextVisitor altTextVisitor = new AltTextVisitor(); image.accept(altTextVisitor); String altText = altTextVisitor.getAltText(); Map attrs = new LinkedHashMap<>(); - attrs.put("src", url); + if (context.shouldSanitizeUrls()) { + url = context.urlSanitizer().sanitizeImageUrl(url); + } + + attrs.put("src", context.encodeUrl(url)); attrs.put("alt", altText); if (image.getTitle() != null) { attrs.put("title", image.getTitle()); diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java new file mode 100644 index 000000000..6cc96c5e7 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/html/DefaultUrlSanitizer.java @@ -0,0 +1,83 @@ +package org.commonmark.renderer.html; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * + * Allows http, https and mailto protocols for url. + * Also allows protocol relative urls, and relative urls. + * Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java + */ +public class DefaultUrlSanitizer implements UrlSanitizer { + private Set protocols; + + public DefaultUrlSanitizer() { + this(Arrays.asList("http", "https", "mailto")); + } + + public DefaultUrlSanitizer(Collection protocols) { + this.protocols = new HashSet<>(protocols); + } + + @Override + public String sanitizeLinkUrl(String url) { + url = stripHtmlSpaces(url); + protocol_loop: + for (int i = 0, n = url.length(); i < n; ++i) { + switch (url.charAt(i)) { + case '/': + case '#': + case '?': // No protocol. + break protocol_loop; + case ':': + String protocol = url.substring(0, i).toLowerCase(); + if (!protocols.contains(protocol)) { + return ""; + } + break protocol_loop; + } + } + return url; + } + + + @Override + public String sanitizeImageUrl(String url) { + return sanitizeLinkUrl(url); + } + + private String stripHtmlSpaces(String s) { + int i = 0, n = s.length(); + for (; n > i; --n) { + if (!isHtmlSpace(s.charAt(n - 1))) { + break; + } + } + for (; i < n; ++i) { + if (!isHtmlSpace(s.charAt(i))) { + break; + } + } + if (i == 0 && n == s.length()) { + return s; + } + return s.substring(i, n); + } + + private boolean isHtmlSpace(int ch) { + switch (ch) { + case ' ': + case '\t': + case '\n': + case '\u000c': + case '\r': + return true; + default: + return false; + + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java index fd077a277..653f189be 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java @@ -1,5 +1,7 @@ package org.commonmark.renderer.html; +import org.commonmark.node.Link; +import org.commonmark.node.Image; import org.commonmark.node.Node; import java.util.Map; @@ -44,4 +46,16 @@ public interface HtmlNodeRendererContext { * @return whether HTML blocks and tags should be escaped or not */ boolean shouldEscapeHtml(); + + /** + * + * @return true if the {@link UrlSanitizer} should be used. + */ + boolean shouldSanitizeUrls(); + + /** + * + * @return Sanitizer to use for securing {@link Link} href and {@link Image} src if sanitizeUrls is true. + */ + UrlSanitizer urlSanitizer(); } diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java index fac3b3ba2..e410ae32d 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java @@ -5,6 +5,8 @@ import org.commonmark.internal.util.Escaping; import org.commonmark.node.HtmlBlock; import org.commonmark.node.HtmlInline; +import org.commonmark.node.Link; +import org.commonmark.node.Image; import org.commonmark.node.Node; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.Renderer; @@ -27,6 +29,8 @@ public class HtmlRenderer implements Renderer { private final String softbreak; private final boolean escapeHtml; + private final boolean sanitizeUrls; + private final UrlSanitizer urlSanitizer; private final boolean percentEncodeUrls; private final List attributeProviderFactories; private final List nodeRendererFactories; @@ -34,7 +38,9 @@ public class HtmlRenderer implements Renderer { private HtmlRenderer(Builder builder) { this.softbreak = builder.softbreak; this.escapeHtml = builder.escapeHtml; + this.sanitizeUrls = builder.sanitizeUrls; this.percentEncodeUrls = builder.percentEncodeUrls; + this.urlSanitizer = builder.urlSanitizer; this.attributeProviderFactories = new ArrayList<>(builder.attributeProviderFactories); this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1); @@ -83,6 +89,8 @@ public static class Builder { private String softbreak = "\n"; private boolean escapeHtml = false; + private boolean sanitizeUrls = false; + private UrlSanitizer urlSanitizer = new DefaultUrlSanitizer(); private boolean percentEncodeUrls = false; private List attributeProviderFactories = new ArrayList<>(); private List nodeRendererFactories = new ArrayList<>(); @@ -124,6 +132,30 @@ public Builder escapeHtml(boolean escapeHtml) { return this; } + /** + * Whether {@link Image} src and {@link Link} href should be sanitized, defaults to {@code false}. + *

+ * + * @param sanitizeUrls true for sanitization, false for preserving raw attribute + * @return {@code this} + */ + public Builder sanitizeUrls(boolean sanitizeUrls) { + this.sanitizeUrls = sanitizeUrls; + return this; + } + + /** + * {@link UrlSanitizer} used to filter URL's if sanitizeUrls is true. + *

+ * + * @param urlSanitizer Filterer used to filter {@link Image} src and {@link Link}. + * @return {@code this} + */ + public Builder urlSanitizer(UrlSanitizer urlSanitizer) { + this.urlSanitizer = urlSanitizer; + return this; + } + /** * Whether URLs of link or images should be percent-encoded, defaults to {@code false}. *

@@ -227,6 +259,16 @@ public boolean shouldEscapeHtml() { return escapeHtml; } + @Override + public boolean shouldSanitizeUrls() { + return sanitizeUrls; + } + + @Override + public UrlSanitizer urlSanitizer() { + return urlSanitizer; + } + @Override public String encodeUrl(String url) { if (percentEncodeUrls) { diff --git a/commonmark/src/main/java/org/commonmark/renderer/html/UrlSanitizer.java b/commonmark/src/main/java/org/commonmark/renderer/html/UrlSanitizer.java new file mode 100644 index 000000000..6ad93e41d --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/html/UrlSanitizer.java @@ -0,0 +1,26 @@ +package org.commonmark.renderer.html; + +import org.commonmark.node.Image; +import org.commonmark.node.Link; + +/** + * Sanitizes urls for img and a elements by whitelisting protocols. + * This is intended to prevent XSS payloads like [Click this totally safe url](javascript:document.xss=true;) + * + * Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java + */ +public interface UrlSanitizer { + /** + * Sanitize a url for use in the href attribute of a {@link Link}. + * @param url Link to sanitize + * @return Sanitized link + */ + public String sanitizeLinkUrl(String url); + + /** + * Sanitize a url for use in the src attribute of a {@link Image}. + * @param url Link to sanitize + * @return Sanitized link {@link Image} + */ + public String sanitizeImageUrl(String url); +} diff --git a/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java b/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java index 30cbf24f3..04b493cba 100644 --- a/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java +++ b/commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java @@ -59,6 +59,39 @@ public void attributeEscaping() { assertEquals("

\n", defaultRenderer().render(paragraph)); } + @Test + public void rawUrlsShouldNotFilterDangerousProtocols() { + Paragraph paragraph = new Paragraph(); + Link link = new Link(); + link.setDestination("javascript:alert(5);"); + paragraph.appendChild(link); + assertEquals("

\n", rawUrlsRenderer().render(paragraph)); + } + + @Test + public void sanitizedUrlsShouldSetRelNoFollow() { + Paragraph paragraph = new Paragraph(); + Link link = new Link(); + link.setDestination("/exampleUrl"); + paragraph.appendChild(link); + assertEquals("

\n", sanitizeUrlsRenderer().render(paragraph)); + + paragraph = new Paragraph(); + link = new Link(); + link.setDestination("https://google.com"); + paragraph.appendChild(link); + assertEquals("

\n", sanitizeUrlsRenderer().render(paragraph)); + } + + @Test + public void sanitizedUrlsShouldFilterDangerousProtocols() { + Paragraph paragraph = new Paragraph(); + Link link = new Link(); + link.setDestination("javascript:alert(5);"); + paragraph.appendChild(link); + assertEquals("

\n", sanitizeUrlsRenderer().render(paragraph)); + } + @Test public void percentEncodeUrlDisabled() { assertEquals("

a

\n", defaultRenderer().render(parse("[a](foo&bar)"))); @@ -267,6 +300,14 @@ private static HtmlRenderer htmlAllowingRenderer() { return HtmlRenderer.builder().escapeHtml(false).build(); } + private static HtmlRenderer sanitizeUrlsRenderer() { + return HtmlRenderer.builder().sanitizeUrls(true).urlSanitizer(new DefaultUrlSanitizer()).build(); + } + + private static HtmlRenderer rawUrlsRenderer() { + return HtmlRenderer.builder().sanitizeUrls(false).build(); + } + private static HtmlRenderer htmlEscapingRenderer() { return HtmlRenderer.builder().escapeHtml(true).build(); }