Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add escape_html_once and escape_html_once_as_safe_html #80

Merged
merged 1 commit into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Unreleased

- Add `EscapeUtils.escape_html_once` and `EscapeUtils.rb_eu_escape_html_once_as_html_safe` as faster implementations of Rails `escape_once` helper.

# 1.2.2

- Update EscapeUtils.escape_javascript to match Rails `escape_javascript`
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ or per-call by passing `false` as the second parameter to `escape_html` like `Es

For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content

To avoid double-escaping HTML entities, use `EscapeUtils.escape_html_once`.

#### Unescaping

``` ruby
Expand Down
25 changes: 25 additions & 0 deletions benchmark/html_escape_once.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# encoding: utf-8

require 'rubygems'
require 'bundler/setup'
require 'benchmark/ips'

require 'escape_utils'
require 'active_support/core_ext/string/output_safety'

url = "https://en.wikipedia.org/wiki/Succession_to_the_British_throne"
html = `curl -s #{url}`
html = html.force_encoding('utf-8') if html.respond_to?(:force_encoding)
puts "Escaping #{html.bytesize} bytes of html from #{url}"

Benchmark.ips do |x|
x.report "EscapeUtils.escape_html_once" do
EscapeUtils.escape_html_once(html)
end

x.report "ActionView escape_once" do # Rails expose it as ERB::Util.html_escape_once
ERB::Util.html_escape_once(html)
end

x.compare!(order: :baseline)
end
30 changes: 26 additions & 4 deletions ext/escape_utils/escape_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static VALUE new_html_safe_string(const char *ptr, size_t len)
return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
}

static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
static VALUE rb_eu_escape_html_as_html_safe0(VALUE self, VALUE str, int escape_once)
{
VALUE result;
int secure = g_html_secure;
Expand All @@ -110,7 +110,7 @@ static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
result = new_html_safe_string(buf.ptr, buf.size);
gh_buf_free(&buf);
} else {
Expand All @@ -123,7 +123,17 @@ static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
return result;
}

static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 0);
}

static VALUE rb_eu_escape_html_once_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 1);
}

static VALUE rb_eu_escape_html0(int argc, VALUE *argv, VALUE self, int escape_once)
{
VALUE str, rb_secure;
gh_buf buf = GH_BUF_INIT;
Expand All @@ -138,7 +148,7 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure)) {
if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
VALUE result = eu_new_str(buf.ptr, buf.size);
gh_buf_free(&buf);
return result;
Expand All @@ -147,6 +157,16 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
return str;
}

static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 0);
}

static VALUE rb_eu_escape_html_once(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 1);
}

static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
{
return rb_eu__generic(str, &houdini_unescape_html);
Expand Down Expand Up @@ -232,7 +252,9 @@ void Init_escape_utils()

rb_mEscapeUtils = rb_define_module("EscapeUtils");
rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html_once_as_html_safe", rb_eu_escape_html_once_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, -1);
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
Expand Down
4 changes: 3 additions & 1 deletion ext/escape_utils/houdini.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ extern "C" {
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif

#define _isasciialpha(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))

#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)

extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure, int escape_once);
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
Expand Down
38 changes: 35 additions & 3 deletions ext/escape_utils/houdini_html_e.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,47 @@ static const char *HTML_ESCAPES[] = {
"&gt;"
};

static int
is_entity(const uint8_t *src, size_t size)
{
size_t i = 0;

if (size == 0 || src[0] != '&')
return false;

if (size > 16)
size = 16;

if (size >= 4 && src[1] == '#') {
if (_isdigit(src[2])) {
for (i = 3; i < size && _isdigit(src[i]); ++i);
}
else if ((src[2] == 'x' || src[2] == 'X') && _isxdigit(src[3])) {
for (i = 4; i < size && _isxdigit(src[i]); ++i);
}
else return false;
}
else {
for (i = 1; i < size && _isasciialpha(src[i]); ++i);
if (i == 1) return false;
}

return i < size && src[i] == ';';
}

int
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure, int escape_once)
{
size_t i = 0, org, esc = 0;

while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
while (i < size) {
esc = HTML_ESCAPE_TABLE[src[i]];
if (unlikely(esc != 0) && (!escape_once || !is_entity(src + i, size - i)))
break;
i++;
}

if (i > org) {
if (unlikely(org == 0)) {
Expand Down Expand Up @@ -85,6 +117,6 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
int
houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
return houdini_escape_html0(ob, src, size, 1, 0);
}

24 changes: 24 additions & 0 deletions test/html/escape_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,30 @@ def test_returns_original_if_not_escaped
assert_equal str.object_id, EscapeUtils.escape_html(str).object_id
end

def test_escape_html_once
{
'&<' => '&amp;&lt;',
'&amp;&lt;&x;' => '&amp;&lt;&x;',
'&amp' => '&amp;amp',
'&!;' => '&amp;!;',
'&#0;' => '&#0;',
'&#10;' => '&#10;',
'&#10' => '&amp;#10',
'&#10000000000;' => '&#10000000000;',
'&#x0;' => '&#x0;',
'&#xf0;' => '&#xf0;',
'&#xf0' => '&amp;#xf0',
'&#x;' => '&amp;#x;',
'&#xfoo;' => '&amp;#xfoo;',
'&#;' => '&amp;#;',
'&#foo;' => '&amp;#foo;',
'foo&amp;bar' => 'foo&amp;bar',
}.each do |(input, output)|
assert_equal output, EscapeUtils.escape_html_once(input)
assert_equal output, EscapeUtils.escape_html_once_as_html_safe(input)
end
end

def test_html_safe_escape_default_works
str = EscapeUtils.escape_html_as_html_safe('foobar')
assert_equal 'foobar', str
Expand Down