Skip to content

Commit

Permalink
Merge pull request #80 from Shopify/escape-once-1.3
Browse files Browse the repository at this point in the history
Add escape_html_once and escape_html_once_as_safe_html
  • Loading branch information
byroot authored Jun 7, 2022
2 parents 2abc768 + aa779db commit e93cf76
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Unreleased

- Add `EscapeUtils.escape_html_once` and `EscapeUtils.rb_eu_escape_html_once_as_html_safe` as faster implementations of Rails `escape_once` helper.

# 1.2.2

- Update EscapeUtils.escape_javascript to match Rails `escape_javascript`
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ or per-call by passing `false` as the second parameter to `escape_html` like `Es

For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content

To avoid double-escaping HTML entities, use `EscapeUtils.escape_html_once`.

#### Unescaping

``` ruby
Expand Down
25 changes: 25 additions & 0 deletions benchmark/html_escape_once.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# encoding: utf-8

require 'rubygems'
require 'bundler/setup'
require 'benchmark/ips'

require 'escape_utils'
require 'active_support/core_ext/string/output_safety'

url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
html = `curl -s #{url}`
html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
puts "Escaping #{html.bytesize} bytes of html from #{url}"

Benchmark.ips do |x|
x.report "EscapeUtils.escape_html_once" do
EscapeUtils.escape_html_once(html)
end

x.report "ActionView escape_once" do # Rails expose it as ERB::Util.html_escape_once
ERB::Util.html_escape_once(html)
end

x.compare!(order: :baseline)
end
30 changes: 26 additions & 4 deletions ext/escape_utils/escape_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static VALUE new_html_safe_string(const char *ptr, size_t len)
return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
}

static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
static VALUE rb_eu_escape_html_as_html_safe0(VALUE self, VALUE str, int escape_once)
{
VALUE result;
int secure = g_html_secure;
Expand All @@ -110,7 +110,7 @@ static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
result = new_html_safe_string(buf.ptr, buf.size);
gh_buf_free(&buf);
} else {
Expand All @@ -123,7 +123,17 @@ static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
return result;
}

static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 0);
}

static VALUE rb_eu_escape_html_once_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 1);
}

static VALUE rb_eu_escape_html0(int argc, VALUE *argv, VALUE self, int escape_once)
{
VALUE str, rb_secure;
gh_buf buf = GH_BUF_INIT;
Expand All @@ -138,7 +148,7 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
VALUE result = eu_new_str(buf.ptr, buf.size);
gh_buf_free(&buf);
return result;
Expand All @@ -147,6 +157,16 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
return str;
}

static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 0);
}

static VALUE rb_eu_escape_html_once(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 1);
}

static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
{
return rb_eu__generic(str, &houdini_unescape_html);
Expand Down Expand Up @@ -232,7 +252,9 @@ void Init_escape_utils()

rb_mEscapeUtils = rb_define_module("EscapeUtils");
rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html_once_as_html_safe", rb_eu_escape_html_once_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, -1);
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
Expand Down
4 changes: 3 additions & 1 deletion ext/escape_utils/houdini.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ extern "C" {
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif

#define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))

#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)

extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure, int escape_once);
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
Expand Down
38 changes: 35 additions & 3 deletions ext/escape_utils/houdini_html_e.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,47 @@ static const char *HTML_ESCAPES[] = {
"&gt;"
};

static int
is_entity(const uint8_t *src, size_t size)
{
size_t i = 0;

if (size == 0 || src[0] != '&')
return false;

if (size > 16)
size = 16;

if (size >= 4 && src[1] == '#') {
if (_isdigit(src[2])) {
for (i = 3; i < size && _isdigit(src[i]); ++i);
}
else if ((src[2] == 'x' || src[2] == 'X') && _isxdigit(src[3])) {
for (i = 4; i < size && _isxdigit(src[i]); ++i);
}
else return false;
}
else {
for (i = 1; i < size && _isasciialpha(src[i]); ++i);
if (i == 1) return false;
}

return i < size && src[i] == ';';
}

int
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure, int escape_once)
{
size_t i = 0, org, esc = 0;

while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
while (i < size) {
esc = HTML_ESCAPE_TABLE[src[i]];
if (unlikely(esc != 0) && (!escape_once || !is_entity(src + i, size - i)))
break;
i++;
}

if (i > org) {
if (unlikely(org == 0)) {
Expand Down Expand Up @@ -85,6 +117,6 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
int
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
return houdini_escape_html0(ob, src, size, 1, 0);
}

24 changes: 24 additions & 0 deletions test/html/escape_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,30 @@ def test_returns_original_if_not_escaped
assert_equal str.object_id, EscapeUtils.escape_html(str).object_id
end

def test_escape_html_once
{
'&<' => '&amp;&lt;',
'&amp;&lt;&x;' => '&amp;&lt;&x;',
'&amp' => '&amp;amp',
'&!;' => '&amp;!;',
'&#0;' => '&#0;',
'&#10;' => '&#10;',
'&#10' => '&amp;#10',
'&#10000000000;' => '&#10000000000;',
'&#x0;' => '&#x0;',
'&#xf0;' => '&#xf0;',
'&#xf0' => '&amp;#xf0',
'&#x;' => '&amp;#x;',
'&#xfoo;' => '&amp;#xfoo;',
'&#;' => '&amp;#;',
'&#foo;' => '&amp;#foo;',
'foo&amp;bar' => 'foo&amp;bar',
}.each do |(input, output)|
assert_equal output, EscapeUtils.escape_html_once(input)
assert_equal output, EscapeUtils.escape_html_once_as_html_safe(input)
end
end

def test_html_safe_escape_default_works
str = EscapeUtils.escape_html_as_html_safe('foobar')
assert_equal 'foobar', str
Expand Down

0 comments on commit e93cf76

Please sign in to comment.