Skip to content

Commit 93bedae

Browse files
authored
Merge pull request #161 from VandorpeDavid/stripping_unsafe_urls
Add option for stripping unsafe URLs
2 parents 27f487f + cf3bd45 commit 93bedae

File tree

6 files changed

+220
-3
lines changed

6 files changed

+220
-3
lines changed

commonmark/src/main/java/org/commonmark/renderer/html/CoreHtmlNodeRenderer.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,14 @@ public void visit(IndentedCodeBlock indentedCodeBlock) {
141141
@Override
142142
public void visit(Link link) {
143143
Map<String, String> attrs = new LinkedHashMap<>();
144-
String url = context.encodeUrl(link.getDestination());
144+
String url = link.getDestination();
145+
146+
if (context.shouldSanitizeUrls()) {
147+
url = context.urlSanitizer().sanitizeLinkUrl(url);
148+
attrs.put("rel", "nofollow");
149+
}
150+
151+
url = context.encodeUrl(url);
145152
attrs.put("href", url);
146153
if (link.getTitle() != null) {
147154
attrs.put("title", link.getTitle());
@@ -171,14 +178,18 @@ public void visit(OrderedList orderedList) {
171178

172179
@Override
173180
public void visit(Image image) {
174-
String url = context.encodeUrl(image.getDestination());
181+
String url = image.getDestination();
175182

176183
AltTextVisitor altTextVisitor = new AltTextVisitor();
177184
image.accept(altTextVisitor);
178185
String altText = altTextVisitor.getAltText();
179186

180187
Map<String, String> attrs = new LinkedHashMap<>();
181-
attrs.put("src", url);
188+
if (context.shouldSanitizeUrls()) {
189+
url = context.urlSanitizer().sanitizeImageUrl(url);
190+
}
191+
192+
attrs.put("src", context.encodeUrl(url));
182193
attrs.put("alt", altText);
183194
if (image.getTitle() != null) {
184195
attrs.put("title", image.getTitle());
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package org.commonmark.renderer.html;
2+
3+
import java.util.Arrays;
4+
import java.util.Collection;
5+
import java.util.HashSet;
6+
import java.util.Set;
7+
8+
/**
9+
*
10+
* Allows http, https and mailto protocols for url.
11+
* Also allows protocol relative urls, and relative urls.
12+
* Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java
13+
*/
14+
public class DefaultUrlSanitizer implements UrlSanitizer {
15+
private Set<String> protocols;
16+
17+
public DefaultUrlSanitizer() {
18+
this(Arrays.asList("http", "https", "mailto"));
19+
}
20+
21+
public DefaultUrlSanitizer(Collection<String> protocols) {
22+
this.protocols = new HashSet<>(protocols);
23+
}
24+
25+
@Override
26+
public String sanitizeLinkUrl(String url) {
27+
url = stripHtmlSpaces(url);
28+
protocol_loop:
29+
for (int i = 0, n = url.length(); i < n; ++i) {
30+
switch (url.charAt(i)) {
31+
case '/':
32+
case '#':
33+
case '?': // No protocol.
34+
break protocol_loop;
35+
case ':':
36+
String protocol = url.substring(0, i).toLowerCase();
37+
if (!protocols.contains(protocol)) {
38+
return "";
39+
}
40+
break protocol_loop;
41+
}
42+
}
43+
return url;
44+
}
45+
46+
47+
@Override
48+
public String sanitizeImageUrl(String url) {
49+
return sanitizeLinkUrl(url);
50+
}
51+
52+
private String stripHtmlSpaces(String s) {
53+
int i = 0, n = s.length();
54+
for (; n > i; --n) {
55+
if (!isHtmlSpace(s.charAt(n - 1))) {
56+
break;
57+
}
58+
}
59+
for (; i < n; ++i) {
60+
if (!isHtmlSpace(s.charAt(i))) {
61+
break;
62+
}
63+
}
64+
if (i == 0 && n == s.length()) {
65+
return s;
66+
}
67+
return s.substring(i, n);
68+
}
69+
70+
private boolean isHtmlSpace(int ch) {
71+
switch (ch) {
72+
case ' ':
73+
case '\t':
74+
case '\n':
75+
case '\u000c':
76+
case '\r':
77+
return true;
78+
default:
79+
return false;
80+
81+
}
82+
}
83+
}

commonmark/src/main/java/org/commonmark/renderer/html/HtmlNodeRendererContext.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.commonmark.renderer.html;
22

3+
import org.commonmark.node.Link;
4+
import org.commonmark.node.Image;
35
import org.commonmark.node.Node;
46

57
import java.util.Map;
@@ -44,4 +46,16 @@ public interface HtmlNodeRendererContext {
4446
* @return whether HTML blocks and tags should be escaped or not
4547
*/
4648
boolean shouldEscapeHtml();
49+
50+
/**
51+
*
52+
* @return true if the {@link UrlSanitizer} should be used.
53+
*/
54+
boolean shouldSanitizeUrls();
55+
56+
/**
57+
*
58+
* @return Sanitizer to use for securing {@link Link} href and {@link Image} src if sanitizeUrls is true.
59+
*/
60+
UrlSanitizer urlSanitizer();
4761
}

commonmark/src/main/java/org/commonmark/renderer/html/HtmlRenderer.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import org.commonmark.internal.util.Escaping;
66
import org.commonmark.node.HtmlBlock;
77
import org.commonmark.node.HtmlInline;
8+
import org.commonmark.node.Link;
9+
import org.commonmark.node.Image;
810
import org.commonmark.node.Node;
911
import org.commonmark.renderer.NodeRenderer;
1012
import org.commonmark.renderer.Renderer;
@@ -27,14 +29,18 @@ public class HtmlRenderer implements Renderer {
2729

2830
private final String softbreak;
2931
private final boolean escapeHtml;
32+
private final boolean sanitizeUrls;
33+
private final UrlSanitizer urlSanitizer;
3034
private final boolean percentEncodeUrls;
3135
private final List<AttributeProviderFactory> attributeProviderFactories;
3236
private final List<HtmlNodeRendererFactory> nodeRendererFactories;
3337

3438
private HtmlRenderer(Builder builder) {
3539
this.softbreak = builder.softbreak;
3640
this.escapeHtml = builder.escapeHtml;
41+
this.sanitizeUrls = builder.sanitizeUrls;
3742
this.percentEncodeUrls = builder.percentEncodeUrls;
43+
this.urlSanitizer = builder.urlSanitizer;
3844
this.attributeProviderFactories = new ArrayList<>(builder.attributeProviderFactories);
3945

4046
this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1);
@@ -83,6 +89,8 @@ public static class Builder {
8389

8490
private String softbreak = "\n";
8591
private boolean escapeHtml = false;
92+
private boolean sanitizeUrls = false;
93+
private UrlSanitizer urlSanitizer = new DefaultUrlSanitizer();
8694
private boolean percentEncodeUrls = false;
8795
private List<AttributeProviderFactory> attributeProviderFactories = new ArrayList<>();
8896
private List<HtmlNodeRendererFactory> nodeRendererFactories = new ArrayList<>();
@@ -124,6 +132,30 @@ public Builder escapeHtml(boolean escapeHtml) {
124132
return this;
125133
}
126134

135+
/**
136+
* Whether {@link Image} src and {@link Link} href should be sanitized, defaults to {@code false}.
137+
* <p>
138+
*
139+
* @param sanitizeUrls true for sanitization, false for preserving raw attribute
140+
* @return {@code this}
141+
*/
142+
public Builder sanitizeUrls(boolean sanitizeUrls) {
143+
this.sanitizeUrls = sanitizeUrls;
144+
return this;
145+
}
146+
147+
/**
148+
* {@link UrlSanitizer} used to filter URL's if sanitizeUrls is true.
149+
* <p>
150+
*
151+
* @param urlSanitizer Filterer used to filter {@link Image} src and {@link Link}.
152+
* @return {@code this}
153+
*/
154+
public Builder urlSanitizer(UrlSanitizer urlSanitizer) {
155+
this.urlSanitizer = urlSanitizer;
156+
return this;
157+
}
158+
127159
/**
128160
* Whether URLs of link or images should be percent-encoded, defaults to {@code false}.
129161
* <p>
@@ -227,6 +259,16 @@ public boolean shouldEscapeHtml() {
227259
return escapeHtml;
228260
}
229261

262+
@Override
263+
public boolean shouldSanitizeUrls() {
264+
return sanitizeUrls;
265+
}
266+
267+
@Override
268+
public UrlSanitizer urlSanitizer() {
269+
return urlSanitizer;
270+
}
271+
230272
@Override
231273
public String encodeUrl(String url) {
232274
if (percentEncodeUrls) {
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package org.commonmark.renderer.html;
2+
3+
import org.commonmark.node.Image;
4+
import org.commonmark.node.Link;
5+
6+
/**
7+
* Sanitizes urls for img and a elements by whitelisting protocols.
8+
* This is intended to prevent XSS payloads like [Click this totally safe url](javascript:document.xss=true;)
9+
*
10+
* Implementation based on https://github.com/OWASP/java-html-sanitizer/blob/f07e44b034a45d94d6fd010279073c38b6933072/src/main/java/org/owasp/html/FilterUrlByProtocolAttributePolicy.java
11+
*/
12+
public interface UrlSanitizer {
13+
/**
14+
* Sanitize a url for use in the href attribute of a {@link Link}.
15+
* @param url Link to sanitize
16+
* @return Sanitized link
17+
*/
18+
public String sanitizeLinkUrl(String url);
19+
20+
/**
21+
* Sanitize a url for use in the src attribute of a {@link Image}.
22+
* @param url Link to sanitize
23+
* @return Sanitized link {@link Image}
24+
*/
25+
public String sanitizeImageUrl(String url);
26+
}

commonmark/src/test/java/org/commonmark/test/HtmlRendererTest.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,39 @@ public void attributeEscaping() {
5959
assertEquals("<p><a href=\"&amp;colon;\"></a></p>\n", defaultRenderer().render(paragraph));
6060
}
6161

62+
@Test
63+
public void rawUrlsShouldNotFilterDangerousProtocols() {
64+
Paragraph paragraph = new Paragraph();
65+
Link link = new Link();
66+
link.setDestination("javascript:alert(5);");
67+
paragraph.appendChild(link);
68+
assertEquals("<p><a href=\"javascript:alert(5);\"></a></p>\n", rawUrlsRenderer().render(paragraph));
69+
}
70+
71+
@Test
72+
public void sanitizedUrlsShouldSetRelNoFollow() {
73+
Paragraph paragraph = new Paragraph();
74+
Link link = new Link();
75+
link.setDestination("/exampleUrl");
76+
paragraph.appendChild(link);
77+
assertEquals("<p><a rel=\"nofollow\" href=\"/exampleUrl\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));
78+
79+
paragraph = new Paragraph();
80+
link = new Link();
81+
link.setDestination("https://google.com");
82+
paragraph.appendChild(link);
83+
assertEquals("<p><a rel=\"nofollow\" href=\"https://google.com\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));
84+
}
85+
86+
@Test
87+
public void sanitizedUrlsShouldFilterDangerousProtocols() {
88+
Paragraph paragraph = new Paragraph();
89+
Link link = new Link();
90+
link.setDestination("javascript:alert(5);");
91+
paragraph.appendChild(link);
92+
assertEquals("<p><a rel=\"nofollow\" href=\"\"></a></p>\n", sanitizeUrlsRenderer().render(paragraph));
93+
}
94+
6295
@Test
6396
public void percentEncodeUrlDisabled() {
6497
assertEquals("<p><a href=\"foo&amp;bar\">a</a></p>\n", defaultRenderer().render(parse("[a](foo&amp;bar)")));
@@ -267,6 +300,14 @@ private static HtmlRenderer htmlAllowingRenderer() {
267300
return HtmlRenderer.builder().escapeHtml(false).build();
268301
}
269302

303+
private static HtmlRenderer sanitizeUrlsRenderer() {
304+
return HtmlRenderer.builder().sanitizeUrls(true).urlSanitizer(new DefaultUrlSanitizer()).build();
305+
}
306+
307+
private static HtmlRenderer rawUrlsRenderer() {
308+
return HtmlRenderer.builder().sanitizeUrls(false).build();
309+
}
310+
270311
private static HtmlRenderer htmlEscapingRenderer() {
271312
return HtmlRenderer.builder().escapeHtml(true).build();
272313
}

0 commit comments

Comments
 (0)