Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,14 @@ public void visit(IndentedCodeBlock indentedCodeBlock) {
@Override
public void visit(Link link) {
Map<String, String> attrs = new LinkedHashMap<>();
String url = context.encodeUrl(link.getDestination());
String url = link.getDestination();

if (context.shouldSanitizeUrls()) {
url = context.urlSanitizer().sanitizeLinkUrl(url);
attrs.put("rel", "nofollow");
}

url = context.encodeUrl(url);
attrs.put("href", url);
if (link.getTitle() != null) {
attrs.put("title", link.getTitle());
Expand Down Expand Up @@ -171,14 +178,18 @@ public void visit(OrderedList orderedList) {

@Override
public void visit(Image image) {
String url = context.encodeUrl(image.getDestination());
String url = image.getDestination();

AltTextVisitor altTextVisitor = new AltTextVisitor();
image.accept(altTextVisitor);
String altText = altTextVisitor.getAltText();

Map<String, String> attrs = new LinkedHashMap<>();
attrs.put("src", url);
if (context.shouldSanitizeUrls()) {
url = context.urlSanitizer().sanitizeImageUrl(url);
}

attrs.put("src", context.encodeUrl(url));
attrs.put("alt", altText);
if (image.getTitle() != null) {
attrs.put("title", image.getTitle());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package org.commonmark.renderer.html;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

/**
*
* Allows http, https and mailto protocols for url.
* Also allows protocol relative urls, and relative urls.
* Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java
*/
public class DefaultUrlSanitizer implements UrlSanitizer {
private Set<String> protocols;

public DefaultUrlSanitizer() {
this(Arrays.asList("http", "https", "mailto"));
}

public DefaultUrlSanitizer(Collection<String> protocols) {
this.protocols = new HashSet<>(protocols);
}

@Override
public String sanitizeLinkUrl(String url) {
url = stripHtmlSpaces(url);
protocol_loop:
for (int i = 0, n = url.length(); i < n; ++i) {
switch (url.charAt(i)) {
case '/':
case '#':
case '?': // No protocol.
break protocol_loop;
case ':':
String protocol = url.substring(0, i).toLowerCase();
if (!protocols.contains(protocol)) {
return "";
}
break protocol_loop;
}
}
return url;
}


@Override
public String sanitizeImageUrl(String url) {
return sanitizeLinkUrl(url);
}

private String stripHtmlSpaces(String s) {
int i = 0, n = s.length();
for (; n > i; --n) {
if (!isHtmlSpace(s.charAt(n - 1))) {
break;
}
}
for (; i < n; ++i) {
if (!isHtmlSpace(s.charAt(i))) {
break;
}
}
if (i == 0 && n == s.length()) {
return s;
}
return s.substring(i, n);
}

private boolean isHtmlSpace(int ch) {
switch (ch) {
case ' ':
case '\t':
case '\n':
case '\u000c':
case '\r':
return true;
default:
return false;

}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.commonmark.renderer.html;

import org.commonmark.node.Link;
import org.commonmark.node.Image;
import org.commonmark.node.Node;

import java.util.Map;
Expand Down Expand Up @@ -44,4 +46,16 @@ public interface HtmlNodeRendererContext {
* @return whether HTML blocks and tags should be escaped or not
*/
boolean shouldEscapeHtml();

/**
*
* @return true if the {@link UrlSanitizer} should be used.
*/
boolean shouldSanitizeUrls();

/**
*
* @return Sanitizer to use for securing {@link Link} href and {@link Image} src if sanitizeUrls is true.
*/
UrlSanitizer urlSanitizer();
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import org.commonmark.internal.util.Escaping;
import org.commonmark.node.HtmlBlock;
import org.commonmark.node.HtmlInline;
import org.commonmark.node.Link;
import org.commonmark.node.Image;
import org.commonmark.node.Node;
import org.commonmark.renderer.NodeRenderer;
import org.commonmark.renderer.Renderer;
Expand All @@ -27,14 +29,18 @@ public class HtmlRenderer implements Renderer {

private final String softbreak;
private final boolean escapeHtml;
private final boolean sanitizeUrls;
private final UrlSanitizer urlSanitizer;
private final boolean percentEncodeUrls;
private final List<AttributeProviderFactory> attributeProviderFactories;
private final List<HtmlNodeRendererFactory> nodeRendererFactories;

private HtmlRenderer(Builder builder) {
this.softbreak = builder.softbreak;
this.escapeHtml = builder.escapeHtml;
this.sanitizeUrls = builder.sanitizeUrls;
this.percentEncodeUrls = builder.percentEncodeUrls;
this.urlSanitizer = builder.urlSanitizer;
this.attributeProviderFactories = new ArrayList<>(builder.attributeProviderFactories);

this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1);
Expand Down Expand Up @@ -83,6 +89,8 @@ public static class Builder {

private String softbreak = "\n";
private boolean escapeHtml = false;
private boolean sanitizeUrls = false;
private UrlSanitizer urlSanitizer = new DefaultUrlSanitizer();
private boolean percentEncodeUrls = false;
private List<AttributeProviderFactory> attributeProviderFactories = new ArrayList<>();
private List<HtmlNodeRendererFactory> nodeRendererFactories = new ArrayList<>();
Expand Down Expand Up @@ -124,6 +132,30 @@ public Builder escapeHtml(boolean escapeHtml) {
return this;
}

/**
* Whether {@link Image} src and {@link Link} href should be sanitized, defaults to {@code false}.
* <p>
*
* @param sanitizeUrls true for sanitization, false for preserving raw attribute
* @return {@code this}
*/
public Builder sanitizeUrls(boolean sanitizeUrls) {
this.sanitizeUrls = sanitizeUrls;
return this;
}

/**
* {@link UrlSanitizer} used to filter URL's if sanitizeUrls is true.
* <p>
*
* @param urlSanitizer Filterer used to filter {@link Image} src and {@link Link}.
* @return {@code this}
*/
public Builder urlSanitizer(UrlSanitizer urlSanitizer) {
this.urlSanitizer = urlSanitizer;
return this;
}

/**
* Whether URLs of link or images should be percent-encoded, defaults to {@code false}.
* <p>
Expand Down Expand Up @@ -227,6 +259,16 @@ public boolean shouldEscapeHtml() {
return escapeHtml;
}

@Override
public boolean shouldSanitizeUrls() {
return sanitizeUrls;
}

@Override
public UrlSanitizer urlSanitizer() {
return urlSanitizer;
}

@Override
public String encodeUrl(String url) {
if (percentEncodeUrls) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.commonmark.renderer.html;

import org.commonmark.node.Image;
import org.commonmark.node.Link;

/**
* Sanitizes urls for img and a elements by whitelisting protocols.
* This is intended to prevent XSS payloads like [Click this totally safe url](javascript:document.xss=true;)
*
* Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java
*/
public interface UrlSanitizer {
/**
* Sanitize a url for use in the href attribute of a {@link Link}.
* @param url Link to sanitize
* @return Sanitized link
*/
public String sanitizeLinkUrl(String url);

/**
* Sanitize a url for use in the src attribute of a {@link Image}.
* @param url Link to sanitize
* @return Sanitized link {@link Image}
*/
public String sanitizeImageUrl(String url);
}
41 changes: 41 additions & 0 deletions commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ public void attributeEscaping() {
assertEquals("<p><a href=\"&amp;colon;\"></a></p>\n", defaultRenderer().render(paragraph));
}

@Test
public void rawUrlsShouldNotFilterDangerousProtocols() {
Paragraph paragraph = new Paragraph();
Link link = new Link();
link.setDestination("javascript:alert(5);");
paragraph.appendChild(link);
assertEquals("<p><a href=\"javascript:alert(5);\"></a></p>\n", rawUrlsRenderer().render(paragraph));
}

@Test
public void sanitizedUrlsShouldSetRelNoFollow() {
Paragraph paragraph = new Paragraph();
Link link = new Link();
link.setDestination("/exampleUrl");
paragraph.appendChild(link);
assertEquals("<p><a rel=\"nofollow\" href=\"/exampleUrl\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));

paragraph = new Paragraph();
link = new Link();
link.setDestination("https://google.com");
paragraph.appendChild(link);
assertEquals("<p><a rel=\"nofollow\" href=\"https://google.com\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));
}

@Test
public void sanitizedUrlsShouldFilterDangerousProtocols() {
Paragraph paragraph = new Paragraph();
Link link = new Link();
link.setDestination("javascript:alert(5);");
paragraph.appendChild(link);
assertEquals("<p><a rel=\"nofollow\" href=\"\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));
}

@Test
public void percentEncodeUrlDisabled() {
assertEquals("<p><a href=\"foo&amp;bar\">a</a></p>\n", defaultRenderer().render(parse("[a](foo&amp;bar)")));
Expand Down Expand Up @@ -267,6 +300,14 @@ private static HtmlRenderer htmlAllowingRenderer() {
return HtmlRenderer.builder().escapeHtml(false).build();
}

private static HtmlRenderer sanitizeUrlsRenderer() {
return HtmlRenderer.builder().sanitizeUrls(true).urlSanitizer(new DefaultUrlSanitizer()).build();
}

private static HtmlRenderer rawUrlsRenderer() {
return HtmlRenderer.builder().sanitizeUrls(false).build();
}

private static HtmlRenderer htmlEscapingRenderer() {
return HtmlRenderer.builder().escapeHtml(true).build();
}
Expand Down