Skip to content

Commit

Permalink
Override URL encoding when serializing results to HTML
Browse files Browse the repository at this point in the history
  • Loading branch information
spassarop committed Jun 17, 2021
1 parent c35826f commit be6a42b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import org.apache.xml.serialize.HTMLdtd;
import org.apache.xml.serialize.OutputFormat;
import org.owasp.validator.html.InternalPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.Writer;

@SuppressWarnings("deprecation")
public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer {

private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class);
private boolean encodeAllPossibleEntities;

public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) {
Expand Down Expand Up @@ -67,4 +70,14 @@ public void endElementIO(String namespaceURI, String localName,
_printer.flush();
}

@Override
protected String escapeURI(String uri) {
String originalURI = uri;
try {
printEscaped(uri);
} catch (IOException e) {
logger.error("URI escaping failed for value: " + originalURI);
}
return "";
}
}
10 changes: 10 additions & 0 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1490,5 +1490,15 @@ public void testGithubIssue81() throws ScanException, PolicyException {
assertThat(as.scan("<p style=\"color: red\">Some Text</p>", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("!important")));
assertThat(as.scan("<p style=\"color: red\">Some Text</p>", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("!important")));
}

@Test
public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException {
// Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":"
// so the validations based on regexp passed and a browser would load "&:" together
assertThat(as.scan("<p><a href=\"javascript&#00058x=1,%61%6c%65%72%74%28%22%62%6f%6f%6d%22%29\">xss</a></p>", policy, AntiSamy.DOM).getCleanHTML(),
containsString("javascript&amp;#00058"));
assertThat(as.scan("<p><a href=\"javascript&#00058x=1,%61%6c%65%72%74%28%22%62%6f%6f%6d%22%29\">xss</a></p>", policy, AntiSamy.SAX).getCleanHTML(),
containsString("javascript&amp;#00058"));
}
}

0 comments on commit be6a42b

Please sign in to comment.