Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert sanitization reordering (restore old behavior) #63

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions ext/selma/src/rewriter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ impl SelmaRewriter {
}
};

let handlers = &binding.handlers;
let handlers: &Vec<Handler> = &binding.handlers;

match Self::perform_handler_rewrite(
self,
Expand Down Expand Up @@ -372,6 +372,9 @@ impl SelmaRewriter {
// TODO: this should ideally be done ahead of time on `initialize`, not on every `#rewrite` call
let mut element_content_handlers: Vec<(Cow<Selector>, ElementContentHandlers)> = vec![];

// have sanitization happen first
element_content_handlers.extend(sanitizer_element_content_handlers);

handlers.iter().for_each(|handler| {
let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));

Expand Down Expand Up @@ -451,25 +454,12 @@ impl SelmaRewriter {
}));
});

let rewritten_html = Self::run_rewrite(
Self::run_rewrite(
self,
sanitizer_document_content_handlers,
element_content_handlers,
html.as_bytes(),
);

// sanitization must happen separately, because text chunks
// could potentially have rewritten the html. ideally we'd
// be able to sanitize around the `process_text_handlers` call
match rewritten_html {
Ok(rewritten_html) => Self::run_rewrite(
self,
vec![],
sanitizer_element_content_handlers,
rewritten_html.as_slice(),
),
Err(err) => Err(err),
}
)
}

fn run_rewrite(
Expand Down
2 changes: 1 addition & 1 deletion lib/selma/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Selma
VERSION = "0.4.1"
VERSION = "0.4.2"
end
4 changes: 2 additions & 2 deletions test/selma_rewriter_match_element_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def test_that_it_works_with_sanitizer
frag = "<malarky><strong><junk>Wow!</junk></strong></malarky>"
modified_doc = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [Handler.new]).rewrite(frag)

# note that sanitization occurs *after* rewriting
assert_equal("<strong>Wow!</strong>", modified_doc)
# note that sanitization does not effect rewriting
assert_equal("<strong class=\"boldy\">Wow!</strong>", modified_doc)
end

class FirstRewrite
Expand Down
6 changes: 3 additions & 3 deletions test/selma_rewriter_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def handle_text_chunk(text)
end
end

def test_rewritten_text_chunk_is_still_sanitized
def test_rewritten_text_chunk_is_not_sanitized
initial_html = "<p>Hey there, @gjtorikian is here.</p>"

sanitizer_config = Selma::Sanitizer.new({
Expand All @@ -107,7 +107,7 @@ def test_rewritten_text_chunk_is_still_sanitized
rewriter = Selma::Rewriter.new(sanitizer: sanitizer_config, handlers: [ElementRewriter.new])
result = rewriter.rewrite(initial_html)

# `class` is sanitized out
assert_equal("<p>Hey there, <a href=\"https://yetto.app/gjtorikian\">@gjtorikian</a> is here.</p>", result)
# `class` is not sanitized out
assert_equal("<p>Hey there, <a href=\"https://yetto.app/gjtorikian\" class=\"user-mention\">@gjtorikian</a> is here.</p>", result)
end
end