Skip to content

Commit

Permalink
Deprecate HTML escaping and unescaping
Browse files Browse the repository at this point in the history
Ref: ruby/ruby@1b7109e
Ref: ruby/ruby@217ad66

The native GCI methods are faster than `EscapeUtils`, no point maintaining this code.
  • Loading branch information
byroot committed Jun 7, 2022
1 parent e93cf76 commit 0da0c0a
Show file tree
Hide file tree
Showing 18 changed files with 123 additions and 485 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Unreleased

- Add `EscapeUtils.escape_html_once` and `EscapeUtils.rb_eu_escape_html_once_as_html_safe` as faster implementations of Rails `escape_once` helper.
- Deprecate `escape_html` and `escape_html_as_html_safe` given that Ruby 2.5 optimized `GCI.escapeHTML` to be twice faster than the `EscapeUtils` implementation.
- Deprecate `unescape_html` given that Ruby 2.5 optimized `GCI.unescapeHTML` to be only 40% slower than th `EscapeUtils` implementation.
- Deprecate `escape_html_as_html_safe` as well.
- Deprecate `EscapeUtils.html_safe`, there's no reason to escape for slashes `/` in 2022.

# 1.2.2

Expand Down
26 changes: 4 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ For character encoding, the output string's encoding is copied from the input st

It has monkey-patches for Rack::Utils, CGI, URI, ERB::Util and Haml and ActionView so you can drop this in and have your app start escaping fast as balls in no time

It supports HTML, URL, URI and Javascript escaping/unescaping.
It supports URL, URI and Javascript escaping/unescaping.

## Installing

Expand All @@ -29,34 +29,16 @@ utf8_string = non_utf8_string.encode('UTF-8')

### HTML

#### Escaping

``` ruby
html = `curl -s http://maps.google.com`
escaped_html = EscapeUtils.escape_html(html)
```

By default escape_utils will escape `/` characters with `/`, but you can disable that by setting `EscapeUtils.html_secure = false`
or per-call by passing `false` as the second parameter to `escape_html` like `EscapeUtils.escape_html(html, false)`

For more information check out: http://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
As of `escape_utils 1.3.0`, regular HTML escaping methods are deprecated. Ruby 2.5 introduced C implementations for `CGI.escapeHTML` and `CGI.unescapeHTML` which are respectively faster and almost as fast as `EscapeUtils`. Use that instead.

To avoid double-escaping HTML entities, use `EscapeUtils.escape_html_once`.

#### Unescaping

``` ruby
html = `curl -s http://maps.google.com`
escaped_html = EscapeUtils.escape_html(html)
html = EscapeUtils.unescape_html(escaped_html)
```

#### Monkey Patches

Since historically, `HTML` monkey patches changed the return value for `ActiveSupport::SafeBuffer` instances, they are conserved for that purpose only, but they should be considered as deprecated as well.

``` ruby
require 'escape_utils/html/erb' # to patch ERB::Util
require 'escape_utils/html/cgi' # to patch CGI
require 'escape_utils/html/haml' # to patch Haml::Helpers
```

### URL
Expand Down
68 changes: 0 additions & 68 deletions benchmark/html_escape.rb

This file was deleted.

35 changes: 0 additions & 35 deletions benchmark/html_unescape.rb

This file was deleted.

127 changes: 7 additions & 120 deletions ext/escape_utils/escape_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@

static VALUE rb_eEncodingCompatibilityError;

static VALUE eu_new_str(const char *str, size_t len)
{
return rb_enc_str_new(str, len, rb_utf8_encoding());
}

static void check_utf8_encoding(VALUE str)
{
static rb_encoding *_cached[3] = {NULL, NULL, NULL};
Expand All @@ -34,41 +29,6 @@ static void check_utf8_encoding(VALUE str)

typedef int (*houdini_cb)(gh_buf *, const uint8_t *, size_t);

static VALUE rb_mEscapeUtils;
static ID ID_at_html_safe, ID_new;

/**
* html_secure instance variable
*/
static int g_html_secure = 1;

static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
{
g_html_secure = RTEST(val);
rb_ivar_set(self, rb_intern("@html_secure"), val);
return val;
}

/**
* html_safe_string_class instance variable
*/
static VALUE rb_html_safe_string_class;
static VALUE rb_html_safe_string_template_object;

static VALUE rb_eu_set_html_safe_string_class(VALUE self, VALUE val)
{
Check_Type(val, T_CLASS);

if (rb_funcall(val, rb_intern("<="), 1, rb_cString) == Qnil)
rb_raise(rb_eArgError, "%s must be a descendent of String", rb_class2name(val));

rb_html_safe_string_class = val;
rb_html_safe_string_template_object = rb_class_new_instance(0, NULL, rb_html_safe_string_class);
OBJ_FREEZE(rb_html_safe_string_template_object);
rb_ivar_set(self, rb_intern("@html_safe_string_class"), val);
return val;
}

/**
* Generic template
*/
Expand All @@ -78,13 +38,13 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
gh_buf buf = GH_BUF_INIT;

if (NIL_P(str))
return eu_new_str("", 0);
return rb_utf8_str_new("", 0);

Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (do_escape(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
VALUE result = eu_new_str(buf.ptr, buf.size);
VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
gh_buf_free(&buf);
return result;
}
Expand All @@ -96,82 +56,22 @@ rb_eu__generic(VALUE str, houdini_cb do_escape)
/**
* HTML methods
*/
static VALUE new_html_safe_string(const char *ptr, size_t len)
{
return rb_str_new_with_class(rb_html_safe_string_template_object, ptr, len);
}

static VALUE rb_eu_escape_html_as_html_safe0(VALUE self, VALUE str, int escape_once)
static VALUE rb_eu_escape_html_once(VALUE self, VALUE str)
{
VALUE result;
int secure = g_html_secure;
gh_buf buf = GH_BUF_INIT;

Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
result = new_html_safe_string(buf.ptr, buf.size);
gh_buf_free(&buf);
} else {
result = new_html_safe_string(RSTRING_PTR(str), RSTRING_LEN(str));
}

rb_ivar_set(result, ID_at_html_safe, Qtrue);
rb_enc_associate(result, rb_enc_get(str));

return result;
}

static VALUE rb_eu_escape_html_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 0);
}

static VALUE rb_eu_escape_html_once_as_html_safe(VALUE self, VALUE str)
{
return rb_eu_escape_html_as_html_safe0(self, str, 1);
}

static VALUE rb_eu_escape_html0(int argc, VALUE *argv, VALUE self, int escape_once)
{
VALUE str, rb_secure;
gh_buf buf = GH_BUF_INIT;
int secure = g_html_secure;

if (rb_scan_args(argc, argv, "11", &str, &rb_secure) == 2) {
if (rb_secure == Qfalse) {
secure = 0;
}
}

Check_Type(str, T_STRING);
check_utf8_encoding(str);

if (houdini_escape_html0(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure, escape_once)) {
VALUE result = eu_new_str(buf.ptr, buf.size);
if (houdini_escape_html_once(&buf, (const uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str))) {
VALUE result = rb_utf8_str_new(buf.ptr, buf.size);
gh_buf_free(&buf);
return result;
}

return str;
}

static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 0);
}

static VALUE rb_eu_escape_html_once(int argc, VALUE *argv, VALUE self)
{
return rb_eu_escape_html0(argc, argv, self, 1);
}

static VALUE rb_eu_unescape_html(VALUE self, VALUE str)
{
return rb_eu__generic(str, &houdini_unescape_html);
}


/**
* XML methods
Expand Down Expand Up @@ -236,7 +136,6 @@ static VALUE rb_eu_unescape_uri_component(VALUE self, VALUE str)
return rb_eu__generic(str, &houdini_unescape_uri_component);
}


/**
* Ruby Extension initializer
*/
Expand All @@ -245,17 +144,8 @@ void Init_escape_utils()
{
rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));

ID_new = rb_intern("new");
ID_at_html_safe = rb_intern("@html_safe");
rb_global_variable(&rb_html_safe_string_class);
rb_global_variable(&rb_html_safe_string_template_object);

rb_mEscapeUtils = rb_define_module("EscapeUtils");
rb_define_method(rb_mEscapeUtils, "escape_html_as_html_safe", rb_eu_escape_html_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html_once_as_html_safe", rb_eu_escape_html_once_as_html_safe, 1);
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, -1);
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
VALUE rb_mEscapeUtils = rb_define_module("EscapeUtils");
rb_define_method(rb_mEscapeUtils, "escape_html_once", rb_eu_escape_html_once, 1);
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
rb_define_method(rb_mEscapeUtils, "escape_javascript", rb_eu_escape_js, 1);
rb_define_method(rb_mEscapeUtils, "unescape_javascript", rb_eu_unescape_js, 1);
Expand All @@ -265,8 +155,5 @@ void Init_escape_utils()
rb_define_method(rb_mEscapeUtils, "unescape_uri", rb_eu_unescape_uri, 1);
rb_define_method(rb_mEscapeUtils, "escape_uri_component", rb_eu_escape_uri_component, 1);
rb_define_method(rb_mEscapeUtils, "unescape_uri_component", rb_eu_unescape_uri_component, 1);

rb_define_singleton_method(rb_mEscapeUtils, "html_secure=", rb_eu_set_html_secure, 1);
rb_define_singleton_method(rb_mEscapeUtils, "html_safe_string_class=", rb_eu_set_html_safe_string_class, 1);
}

4 changes: 1 addition & 3 deletions ext/escape_utils/houdini.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)

extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure, int escape_once);
extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html_once(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri_component(gh_buf *ob, const uint8_t *src, size_t size);
Expand Down
Loading

0 comments on commit 0da0c0a

Please sign in to comment.