Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for :all protocols #55

Merged
merged 3 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@
"[ruby]": {
"editor.defaultFormatter": "Shopify.ruby-lsp"
},
"[markdown]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ attributes: {

# URL handling protocols to allow in specific attributes. By default, no
# protocols are allowed. Use :relative in place of a protocol if you want
# to allow relative URLs sans protocol.
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
protocols: {
"a" => { "href" => ["http", "https", "mailto", :relative] },
"img" => { "href" => ["http", "https"] },
Expand Down
33 changes: 20 additions & 13 deletions ext/selma/src/sanitizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,20 +211,23 @@ impl SelmaSanitizer {
}
Some(protocol_list) => protocol_list.push(allowed_protocol.to_string()),
}
} else if allowed_protocol.is_kind_of(class::symbol())
&& allowed_protocol.inspect() == ":relative"
{
match protocol_list {
None => {
protocol_sanitizers.insert(
attr_name.to_string(),
vec!["#".to_string(), "/".to_string()],
);
}
Some(protocol_list) => {
protocol_list.push("#".to_string());
protocol_list.push("/".to_string());
} else if allowed_protocol.is_kind_of(class::symbol()) {
let protocol_config = allowed_protocol.inspect();
if protocol_config == ":relative" {
match protocol_list {
None => {
protocol_sanitizers.insert(
attr_name.to_string(),
vec!["#".to_string(), "/".to_string()],
);
}
Some(protocol_list) => {
protocol_list.push("#".to_string());
protocol_list.push("/".to_string());
}
}
} else if protocol_config == ":all" {
protocol_sanitizers.insert(attr_name.to_string(), vec!["all".to_string()]);
}
}
}
Expand Down Expand Up @@ -388,6 +391,10 @@ impl SelmaSanitizer {
}

fn has_allowed_protocol(protocols_allowed: &[String], attr_val: &String) -> bool {
if protocols_allowed.contains(&"all".to_string()) {
return true;
}

// FIXME: is there a more idiomatic way to do this?
let mut pos: usize = 0;
let mut chars = attr_val.chars();
Expand Down
7 changes: 6 additions & 1 deletion lib/selma/sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ def allow_class(element, *klass)
end

def allow_protocol(element, attr, protos)
protos = [protos] unless protos.is_a?(Array)
if protos.is_a?(Array)
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
else
protos = [protos]
end

set_allowed_protocols(element, attr, protos)
end

Expand Down
2 changes: 1 addition & 1 deletion lib/selma/sanitizer/config/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ module Config

# URL handling protocols to allow in specific attributes. By default, no
# protocols are allowed. Use :relative in place of a protocol if you want
# to allow relative URLs sans protocol.
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
protocols: {},

# An Array of element names whose contents will be removed. The contents
Expand Down
2 changes: 1 addition & 1 deletion lib/selma/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Selma
VERSION = "0.2.2"
VERSION = "0.3.0"
end
13 changes: 13 additions & 0 deletions test/selma_maliciousness_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,17 @@ def test_that_it_raises_on_handle_text_returning_non_string
Selma::Rewriter.new(sanitizer: nil, handlers: [GarbageTextOptions.new]).rewrite(frag)
end
end

def test_sanitizer_expects_all_as_symbol
html = "<a href='https://google.com'>wow!</a>"
sanitizer = Selma::Sanitizer.new({
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => [:all] } },
})

assert_raises(ArgumentError) do
Selma::Rewriter.new(sanitizer: sanitizer).rewrite(html)
end
end
end
18 changes: 18 additions & 0 deletions test/selma_sanitizer_elements_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,24 @@ def test_should_allow_relative_urls_containing_colons_when_the_colon_is_part_of_
assert_equal("<a>Footnote 1</a>", Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input))
end

def test_should_allow_all_protocols_if_asked
input = <<~HTML
<a href="/foo/bar">Link</a>
<a href="http://wow.com/foo/bar">Link</a>
<a href="https://wow.com/foo/bar">Link</a>
<a href="ftp://wow.com/foo/bar">Link</a>
<a href="ssh://127.0.0.1">Link</a>
HTML

sanitizer = Selma::Sanitizer.new({
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => :all } },
})

assert_equal(input, Selma::Rewriter.new(sanitizer: sanitizer).rewrite(input))
end

def test_should_remove_the_contents_of_filtered_nodes_when_remove_contents_is_true
sanitizer = Selma::Sanitizer.new({ remove_contents: true })

Expand Down