diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index b9b6a927..6cbca5f7 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,4 +1,12 @@ +# These are supported funding model platforms + github: gjtorikian -patreon: gjtorikian -open_collective: garen-torikian -issuehunt: gjtorikian +# patreon: gjtorikian +# open_collective: garen-torikian +#ko_fi: # Replace with a single Ko-fi username +#tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +#community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +#liberapay: # Replace with a single Liberapay username +# issuehunt: gjtorikian +#otechie: # Replace with a single Otechie username +#custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..be466a31 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,20 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: daily + time: "09:00" + timezone: "Etc/UTC" + open-pull-requests-limit: 10 + + - package-ecosystem: "bundler" + directory: "/" + schedule: + interval: daily + time: "09:00" + timezone: "Etc/UTC" + open-pull-requests-limit: 10 + allow: + - dependency-name: "*" + dependency-type: "production" diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml new file mode 100644 index 00000000..4b61205e --- /dev/null +++ b/.github/workflows/automerge.yml @@ -0,0 +1,34 @@ +name: PR auto-{approve,merge} + +on: + pull_request_target: + +permissions: + pull-requests: write + contents: write + +jobs: + dependabot: + name: Dependabot + runs-on: ubuntu-latest + + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - name: Fetch Dependabot metadata + id: dependabot-metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + + - name: Approve Dependabot PR + if: ${{steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major'}} + run: gh pr review --approve "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Merge Dependabot PR + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..2a5663ae --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: Linting + +on: + pull_request: + paths: + - "**/*.rb" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1.0 + rubygems: latest + bundler-cache: true + - run: bundle install + - name: Rubocop + run: bundle exec rake rubocop diff --git a/.github/workflows/tag_and_release.yml b/.github/workflows/tag_and_release.yml new file mode 100644 index 00000000..a1bec14b --- /dev/null +++ b/.github/workflows/tag_and_release.yml @@ -0,0 +1,70 @@ +name: Tag and Release + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - "lib/html_pipeline/version.rb" + +jobs: + release: + env: + GEM_NAME: html-pipeline + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GEM_HOST_API_KEY: ${{ secrets.RUBYGEMS_API_BOT_KEY }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Ruby 3.1 + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.1 + bundler-cache: true + + - name: Configure Git + run: | + git config --local user.email "actions@github.com" + git config --local user.name "Actions Auto Build" + + - name: Get current version + id: version-label + run: | + VERSION=$(grep VERSION lib/html_pipeline/version.rb | head -n 1 | cut -d'"' -f2) + echo "version=${VERSION}" >> $GITHUB_OUTPUT + + - name: Create tag + run: | + git tag -a v${{ steps.version-label.outputs.version }} -m "Release v${{ steps.version-label.outputs.version }}" + git push origin --tags + + - name: Generate CHANGELOG.md + id: changelog + run: script/generate_changelog + + - name: Commit & Push Changelog + run: | + git config --local user.email "actions@github.com" + git config --local user.name "Actions Auto Build" + git add -f CHANGELOG.md + git commit -m "docs: update changelog" || true + git push + + - name: Publish release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh release create v${{ steps.version-label.outputs.version }} --generate-notes + + - name: Publish to RubyGems + run: | + mkdir -p $HOME/.gem + touch $HOME/.gem/credentials + chmod 0600 $HOME/.gem/credentials + printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials + bundle exec rake package + for gem in pkg/html-pipeline-${{ steps.version-label.outputs.version }}*.gem ; do + gem push "$gem" --host https://rubygems.org + done diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..ba94ccbf --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,33 @@ +name: Tests + +on: + pull_request: + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + strategy: + fail-fast: true + matrix: + ruby-version: + - 3.1.0 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Ruby ${{ matrix.ruby-version }} + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby-version }} + rubygems: latest + bundler-cache: true + + - name: Install dependencies + run: bundle install + + - name: Run tests + run: bundle exec rake test diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 00000000..ff7134f7 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,17 @@ +inherit_gem: + rubocop-standard: + - config/default.yml + - config/minitest.yml + +inherit_mode: + merge: + - Exclude + +AllCops: + Exclude: + - test/progit/**/* + - "pkg/**/*" + - "ext/**/*" + - "vendor/**/*" + - "tmp/**/*" + - "test/progit/**/*" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ff321ccd..00000000 --- a/.travis.yml +++ /dev/null @@ -1,43 +0,0 @@ -language: ruby -cache: bundler -bundler_args: --path ../../vendor/bundle - -addons: - apt: - sources: - - libicu-dev - - kalakris-cmake - packages: - - cmake - -script: bundle exec rake - -gemfile: - - gemfiles/rails_6.gemfile - - gemfiles/rails_5.gemfile - - gemfiles/rails_4.gemfile - - gemfiles/rails_3.gemfile - -rvm: - - 2.4.6 - - 2.3.8 - - 2.5.7 - - ruby-head - -matrix: - fast_finish: true - allow_failures: - - rvm: ruby-head - exclude: - - gemfile: gemfiles/rails_6.gemfile - rvm: 2.4.6 - - gemfile: gemfiles/rails_6.gemfile - rvm: 2.3.8 - - gemfile: gemfiles/rails_4.gemfile - rvm: 2.5.7 - - gemfile: gemfiles/rails_4.gemfile - rvm: 2.4.6 - - gemfile: gemfiles/rails_3.gemfile - rvm: 2.5.7 - - gemfile: gemfiles/rails_3.gemfile - rvm: 2.4.6 diff --git a/Appraisals b/Appraisals deleted file mode 100644 index 6037fd68..00000000 --- a/Appraisals +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -appraise 'rails-3' do - gem 'rack', '< 2' - gem 'rails', '3.2.22.2' -end - -appraise 'rails-4' do - gem 'rack', '< 2' - gem 'rails', '~> 4.2.6' -end - -appraise 'rails-5' do - gem 'rails', '~> 5.0.0' -end - -appraise 'rails-6' do - gem 'rails', '~> 6.0.0' -end diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c4ba82f..bdf6cc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,10 +56,7 @@ **Merged pull requests:** -- Replace whitelist with more neutral language [\#339](https://github.com/gjtorikian/html-pipeline/pull/339) ([tancnle](https://github.com/tancnle)) -- allows progress tags to be used [\#338](https://github.com/gjtorikian/html-pipeline/pull/338) ([pedrozath](https://github.com/pedrozath)) -- Updated English [\#337](https://github.com/gjtorikian/html-pipeline/pull/337) ([BhuvnendraPratapSingh](https://github.com/BhuvnendraPratapSingh)) -- Make AutolinkFilter configurable [\#335](https://github.com/gjtorikian/html-pipeline/pull/335) ([mnishiguchi](https://github.com/mnishiguchi)) + * Freeze all elements in HTML::Pipeline::SanitizationFilter [#299](https://github.com/jch/html-pipeline/pull/299) ## [v2.14.0](https://github.com/gjtorikian/html-pipeline/tree/v2.14.0) (2020-08-11) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 5f66206d..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,60 +0,0 @@ -# Contributing - -Thanks for using and improving `HTML::Pipeline`! - -- [Submitting a New Issue](#submitting-a-new-issue) -- [Sending a Pull Request](#sending-a-pull-request) - -## Submitting a New Issue - -If there's an idea you'd like to propose, or a design change, feel free to file a new issue. - -If you have an implementation question or believe you've found a bug, please provide as many details as possible: - -- Input document -- Output HTML document -- the exact `HTML::Pipeline` code you are using -- output of the following from your project - -``` -ruby -v -bundle exec nokogiri -v -``` - -## Sending a Pull Request - -[Pull requests][pr] are always welcome! - -Check out [the project's issues list][issues] for ideas on what could be improved. - -Before sending, please add tests and ensure the test suite passes. - -### Running the Tests - -To run the full suite: - - `bundle exec rake` - -To run a specific test file: - - `bundle exec ruby -Itest test/html/pipeline_test.rb` - -To run a specific test: - - `bundle exec ruby -Itest test/html/pipeline/markdown_filter_test.rb -n test_disabling_gfm` - -To run the full suite with all [supported rubies][travisyaml] in bash: - -```bash -rubies=(ree-1.8.7-2011.03 1.9.2-p290 1.9.3-p429 2.0.0-p247) -for r in ${rubies[*]} -do - rbenv local $r # switch to your version manager of choice - bundle install - bundle exec rake -done -``` - -[issues]: https://github.com/jch/html-pipeline/issues -[pr]: https://help.github.com/articles/using-pull-requests -[travisyaml]: https://github.com/jch/html-pipeline/blob/master/.travis.yml diff --git a/Gemfile b/Gemfile index adc923bd..e18f5638 100644 --- a/Gemfile +++ b/Gemfile @@ -1,26 +1,40 @@ # frozen_string_literal: true -source 'https://rubygems.org' +source "https://rubygems.org" # Specify your gem's dependencies in html-pipeline.gemspec gemspec +gem "awesome_print" + +gem "rubocop" +gem "rubocop-standard" + +gem "github_changelog_generator", "~> 1.16" + +gem "sorbet-runtime" + +group :development, :test do + gem "amazing_print" + gem "debug" +end + group :development do - gem 'appraisal' - gem 'bundler' - gem 'rake' + gem "tapioca", require: false + gem "sorbet" + gem "bundler" + gem "rake" end group :test do - gem 'commonmarker', '~> 0.16', require: false - gem 'email_reply_parser', '~> 0.5', require: false - gem 'gemoji', '~> 2.0', require: false - gem 'minitest' - gem 'RedCloth', '~> 4.2.9', require: false - gem 'rinku', '~> 1.7', require: false - gem 'sanitize', '~> 4.6', require: false - - gem 'escape_utils', '~> 1.0', require: false - gem 'rouge', '~> 3.1', require: false - gem 'minitest-focus', '~> 1.1' + gem "commonmarker", "~> 1.0.0.pre4", require: false + gem "gemoji", "~> 3.0", require: false + gem "gemojione", "~> 4.3", require: false + gem "minitest" + + gem "minitest-bisect", "~> 1.6" + + gem "nokogiri", "~> 1.13" + + gem "minitest-focus", "~> 1.1" end diff --git a/LICENSE b/LICENSE.txt similarity index 89% rename from LICENSE rename to LICENSE.txt index c972fc6b..574b42c1 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2012 GitHub Inc. and Jerry Cheung +Copyright (c) 2023 Garen Torikian MIT License @@ -19,4 +19,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index e852842b..f1d30ef3 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,23 @@ -# HTML::Pipeline [![Build Status](https://travis-ci.org/jch/html-pipeline.svg?branch=master)](https://travis-ci.org/jch/html-pipeline) +# HTMLPipeline -HTML processing filters and utilities. This module includes a small -framework for defining DOM based content filters and applying them to user +> **Note** +> This README refers to the behavior in the new 3.0.0.pre1 gem. + +HTML processing filters and utilities. This module is a small +framework for defining CSS-based content filters and applying them to user provided content. -[This project was started at GitHub](https://github.com/blog/1311-html-pipeline-chainable-content-filters). While GitHub still uses a similar design and pattern for rendering content, this gem should be considered standalone and independent from GitHub. +[Although this project was started at GitHub](https://github.com/blog/1311-html-pipeline-chainable-content-filters), they no longer do. This gem must be considered standalone and independent from GitHub. - [Installation](#installation) - [Usage](#usage) - - [Examples](#examples) + - [More Examples](#more-examples) - [Filters](#filters) - [Dependencies](#dependencies) - [Documentation](#documentation) -- [Extending](#extending) - - [3rd Party Extensions](#3rd-party-extensions) - [Instrumenting](#instrumenting) -- [Contributing](#contributing) - - [Contributors](#contributors) - - [Releasing A New Version](#releasing-a-new-version) +- [Third Party Extensions](#third-party-extensions) +- [FAQ](#faq) ## Installation @@ -42,220 +42,216 @@ $ gem install html-pipeline ## Usage This library provides a handful of chainable HTML filters to transform user -content into markup. A filter takes an HTML string or -`Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then -outputs the result. +content into HTML markup. Each filter does some work, and then hands off the +results tothe next filter. A pipeline has several kinds of filters available to use: -For example, to transform Markdown source into Markdown HTML: +- Multiple `TextFilter`s, which operate a UTF-8 string +- A `ConvertFilter` filter, which turns text into HTML (eg., Commonmark/Asciidoc -> HTML) +- A `SanitizationFilter`, which remove dangerous/unwanted HTML elements and attributes +- Multiple `NodeFilter`s, which operate on a UTF-8 HTML document -```ruby -require 'html/pipeline' +You can assemble each sequence into a single pipeline, or choose to call each filter individually. -filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!") -filter.call -``` +As an example, suppose we want to transform Commonmark source text into Markdown HTML. With the content, we also want to: + +- change every instance of `$NAME` to "`Johnny" +- strip undesired HTML +- linkify @mention -Filters can be combined into a pipeline which causes each filter to hand its -output to the next filter's input. So if you wanted to have content be -filtered through Markdown and be syntax highlighted, you can create the -following pipeline: +We can construct a pipeline to do all that like this: ```ruby -pipeline = HTML::Pipeline.new [ - HTML::Pipeline::MarkdownFilter, - HTML::Pipeline::SyntaxHighlightFilter -] -result = pipeline.call <<-CODE -This is *great*: +require 'html_pipeline' - some_code(:first) +class HelloJohnnyFilter < HTMLPipelineFilter + def call + text.gsub("$NAME", "Johnny") + end +end -CODE -result[:output].to_s +pipeline = HTMLPipeline.new( + text_filters: [HelloJohnnyFilter.new] + convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter.new), + # note: next line is not needed as sanitization occurs by default; + # see below for more info + sanitization_config: HTMLPipeline::SanitizationFilter::DEFAULT_CONFIG, + node_filters: [HTMLPipeline::NodeFilter::MentionFilter.new] +) +pipeline.call(user_supplied_text) # recommended: can call pipeline over and over ``` -Prints: - -```html -

This is great:

+Filters can be custom ones you create (like `HelloJohnnyFilter`), and `HTMLPipeline` additionally provides several helpful ones (detailed below). If you only need a single filter, you can call one individually, too: -
some_code(:first)
-
+```ruby +filter = HTMLPipeline::ConvertFilter::MarkdownFilter.new(text) +filter.call ``` -To generate CSS for HTML formatted code, use the [Rouge CSS Theme](https://github.com/rouge-ruby/rouge#css-options) `#css` method. `rouge` is a dependency of the `SyntaxHighlightFilter`. +Filters combine into a sequential pipeline, and each filter hands its +output to the next filter's input. Text filters are +processed first, then the convert filter, sanitization filter, and finally, the node filters. -Some filters take an optional **context** and/or **result** hash. These are +Some filters take optional `context` and/or `result` hash(es). These are used to pass around arguments and metadata between filters in a pipeline. For -example, if you don't want to use GitHub formatted Markdown, you can pass an -option in the context hash: +example, if you want to disable footnotes in the `MarkdownFilter`, you can pass an option in the context hash: ```ruby -filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false) +context = { markdown: extensions: { footnotes: false } } +filter = HTMLPipeline::ConvertFilter::MarkdownFilter.new("Hi **world**!", context: context) filter.call ``` -### Examples +Please refer to the documentation for each filter to understand what configuration options are available. + +### More Examples -We define different pipelines for different parts of our app. Here are a few +Different pipelines can be defined for different parts of an app. Here are a few paraphrased snippets to get you started: ```ruby # The context hash is how you pass options between different filters. # See individual filter source for explanation of options. context = { - :asset_root => "http://your-domain.com/where/your/images/live/icons", - :base_url => "http://your-domain.com" + asset_root: "http://your-domain.com/where/your/images/live/icons", + base_url: "http://your-domain.com" } -# Pipeline providing sanitization and image hijacking but no mention -# related features. -SimplePipeline = Pipeline.new [ - SanitizationFilter, - TableOfContentsFilter, # add 'name' anchors to all headers and generate toc list - CamoFilter, - ImageMaxWidthFilter, - SyntaxHighlightFilter, - EmojiFilter, - AutolinkFilter -], context - # Pipeline used for user provided content on the web -MarkdownPipeline = Pipeline.new [ - MarkdownFilter, - SanitizationFilter, - CamoFilter, - ImageMaxWidthFilter, - HttpsFilter, - MentionFilter, - EmojiFilter, - SyntaxHighlightFilter -], context.merge(:gfm => true) # enable github formatted markdown - - -# Define a pipeline based on another pipeline's filters -NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters, - context.merge(:gfm => false)) +MarkdownPipeline = HTMLPipeline.new ( + text_filters: [HTMLPipeline::TextFilter::ImageMaxWidthFilter.new], + convert_filter: [HTMLPipeline::ConvertFilter::MarkdownFilter.new], + node_filters: [ + HTMLPipeline::NodeFilter::HttpsFilter.new,HTMLPipeline::NodeFilter::MentionFilter.new, + ], context: context) # Pipelines aren't limited to the web. You can use them for email # processing also. -HtmlEmailPipeline = Pipeline.new [ - PlainTextInputFilter, - ImageMaxWidthFilter -], {} - -# Just emoji. -EmojiPipeline = Pipeline.new [ - PlainTextInputFilter, - EmojiFilter -], context +HtmlEmailPipeline = HTMLPipeline.new( + text_filters: [ + PlainTextInputFilter.new, + ImageMaxWidthFilter.new + ], {}) ``` ## Filters -* `MentionFilter` - replace `@user` mentions with links -* `TeamMentionFilter` - replace `@org/team` mentions with links -* `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions -* `AutolinkFilter` - auto_linking urls in HTML -* `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions -* `EmailReplyFilter` - util filter for working with emails -* `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)! -* `HttpsFilter` - HTML Filter for replacing http github urls with https versions. -* `ImageMaxWidthFilter` - link to full size image for large images -* `MarkdownFilter` - convert markdown to html -* `PlainTextInputFilter` - html escape text and wrap the result in a div -* `SanitizationFilter` - allow sanitize user markup -* `SyntaxHighlightFilter` - code syntax highlighter -* `TextileFilter` - convert textile to html -* `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings +### TextFilters -## Dependencies +`TextFilter`s must define a method named `call` which is called on the text. `@text`, `@config`, and `@result` are available to use, and any changes made to these ivars are passed on to the next filter. + +- `ImageFilter` - converts image `url` into `` tag +- `PlainTextInputFilter` - html escape text and wrap the result in a `
` -Filter gem dependencies are not bundled; you must bundle the filter's gem -dependencies. The below list details filters with dependencies. For example, -`SyntaxHighlightFilter` uses [rouge](https://github.com/jneen/rouge) -to detect and highlight languages. For example, to use the `SyntaxHighlightFilter`, -add the following to your Gemfile: +### ConvertFilter + +The `ConvertFilter` takes text and turns it into HTML. `@text`, `@config`, and `@result` are available to use. `ConvertFilter` must defined a method named `call`, taking one argument, `text`. `call` must return a string representing the new HTML document. + +- `MarkdownFilter` - creates HTML from text using [Commonmarker](https://www.github.com/gjtorikian/commonmarker) + +### Sanitization + +Because the web can be a scary place, HTML is automatically sanitized after the `ConvertFilter` runs and before the `NodeFilter`s are processed. This is to prevent malicious or unexpected input from entering the pipeline. + +The sanitization process takes a hash configuration of settings. See the [Selma](https://www.github.com/gjtorikian/selma) documentation for more information on how to configure these settings. + +A default sanitization config is provided by this library (`HTMLPipeline::SanitizationFilter::DEFAULT_CONFIG`). A sample custom sanitization allowlist might look like this: ```ruby -gem 'rouge' +ALLOWLIST = { + elements: ["p", "pre", "code"] +} + +pipeline = HTMLPipeline.new \ + text_filters: [ + HTMLPipeline::MarkdownFilter, + ], + convert_filter: [HTMLPipeline::ConvertFilter::MarkdownFilter.new], + sanitization_config: ALLOWLIST + +result = pipeline.call <<-CODE +This is *great*: + + some_code(:first) + +CODE +result[:output].to_s ``` -* `AutolinkFilter` - `rinku` -* `EmailReplyFilter` - `escape_utils`, `email_reply_parser` -* `EmojiFilter` - `gemoji` -* `MarkdownFilter` - `commonmarker` -* `PlainTextInputFilter` - `escape_utils` -* `SanitizationFilter` - `sanitize` -* `SyntaxHighlightFilter` - `rouge` -* `TableOfContentsFilter` - `escape_utils` -* `TextileFilter` - `RedCloth` +This would print: -_Note:_ See [Gemfile](/Gemfile) `:test` block for version requirements. +```html +

This is great:

+
some_code(:first)
+
+``` -## Documentation +Sanitization can be disabled if and only if `nil` is explicitly passed as +the config: -Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames). +```ruby +pipeline = HTMLPipeline.new \ + text_filters: [ + HTMLPipeline::MarkdownFilter, + ], + convert_filter: [HTMLPipeline::ConvertFilter::MarkdownFilter.new], + sanitization_config: nil +``` + +For more examples of customizing the sanitization process to include the tags you want, check out [the tests](test/sanitization_filter_test.rb). -## Extending -To write a custom filter, you need a class with a `call` method that inherits -from `HTML::Pipeline::Filter`. +### NodeFilters -For example this filter adds a base url to images that are root relative: +`NodeFilters`s can operate either on HTML elements or text nodes using CSS selectors. Each `NodeFilter` must define a method named `selector` which provides an instance of `Selma::Selector`. If elements are being manipulated, `handle_element` must be defined, taking one argument, `element`; if text nodes are being manipulated, `handle_text_chunk` must be defined, taking one argument, `text_chunk`. `@config`, and `@result` are available to use, and any changes made to these ivars are passed on to the next filter. + +`NodeFilter` also has an optional method, `after_initialize`, which is run after the filter initializes. This can be useful in setting up a custom state for `result` to take advantage of. + +Here's an example `NodeFilter` that adds a base url to images that are root relative: ```ruby require 'uri' -class RootRelativeFilter < HTML::Pipeline::Filter +class RootRelativeFilter < HTMLPipeline::NodeFilter - def call - doc.search("img").each do |img| - next if img['src'].nil? - src = img['src'].strip - if src.start_with? '/' - img["src"] = URI.join(context[:base_url], src).to_s - end - end - doc + SELECTOR = Selma::Selector.new(match_element: "img") + + def selector + SELECTOR end + def handle_element(img) + next if img['src'].nil? + src = img['src'].strip + if src.start_with? '/' + img["src"] = URI.join(context[:base_url], src).to_s + end + end end ``` -Now this filter can be used in a pipeline: +For more information on how to write effective `NodeFilter`s, refer to the provided filters, and see the underlying lib, [Selma](https://www.github.com/gjtorikian/selma) for more information. -```ruby -Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' } -``` +- `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions +- `EmojiFilter` - converts `::` to [emoji](http://www.emoji-cheat-sheet.com/)! +- `HttpsFilter` - Replacing http urls with https versions +- `ImageMaxWidthFilter` - link to full size image for large images +- `MentionFilter` - replace `@user` mentions with links +- `SanitizationFilter` - allow sanitize user markup +- `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings +- `TeamMentionFilter` - replace `@org/team` mentions with links -### 3rd Party Extensions +## Dependencies -If you have an idea for a filter, propose it as -[an issue](https://github.com/jch/html-pipeline/issues) first. This allows us discuss -whether the filter is a common enough use case to belong in this gem, or should be -built as an external gem. +Since filters can be customized to your heart's content, gem dependencies are _not_ bundled; this project doesn't know which of the default filters you might use, and as such, you must bundle each filter's gem +dependencies yourself. -Here are some extensions people have built: +> **Note** +> See the [Gemfile](/Gemfile) `:test` group for any version requirements. -* [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter) -* [jekyll-html-pipeline](https://github.com/gjtorikian/jekyll-html-pipeline) -* [nanoc-html-pipeline](https://github.com/burnto/nanoc-html-pipeline) -* [html-pipeline-bitly](https://github.com/dewski/html-pipeline-bitly) -* [html-pipeline-cite](https://github.com/lifted-studios/html-pipeline-cite) -* [tilt-html-pipeline](https://github.com/bradgessler/tilt-html-pipeline) -* [html-pipeline-wiki-link'](https://github.com/lifted-studios/html-pipeline-wiki-link) - WikiMedia-style wiki links -* [task_list](https://github.com/github/task_list) - GitHub flavor Markdown Task List -* [html-pipeline-nico_link](https://github.com/rutan/html-pipeline-nico_link) - An HTML::Pipeline filter for [niconico](http://www.nicovideo.jp) description links -* [html-pipeline-gitlab](https://gitlab.com/gitlab-org/html-pipeline-gitlab) - This gem implements various filters for html-pipeline used by GitLab -* [html-pipeline-youtube](https://github.com/st0012/html-pipeline-youtube) - An HTML::Pipeline filter for YouTube links -* [html-pipeline-flickr](https://github.com/st0012/html-pipeline-flickr) - An HTML::Pipeline filter for Flickr links -* [html-pipeline-vimeo](https://github.com/dlackty/html-pipeline-vimeo) - An HTML::Pipeline filter for Vimeo links -* [html-pipeline-hashtag](https://github.com/mr-dxdy/html-pipeline-hashtag) - An HTML::Pipeline filter for hashtags -* [html-pipeline-linkify_github](https://github.com/jollygoodcode/html-pipeline-linkify_github) - An HTML::Pipeline filter to autolink GitHub urls -* [html-pipeline-redcarpet_filter](https://github.com/bmikol/html-pipeline-redcarpet_filter) - Render Markdown source text into Markdown HTML using Redcarpet -* [html-pipeline-typogruby_filter](https://github.com/bmikol/html-pipeline-typogruby_filter) - Add Typogruby text filters to your HTML::Pipeline -* [korgi](https://github.com/jodeci/korgi) - HTML::Pipeline filters for links to Rails resources +When developing a custom filter, call `HTMLPipeline.require_dependency` at the start to ensure that the local machine has the necessary dependency. You can also use `HTMLPipeline.require_dependencies` to provide a list of dependencies to check. +## Documentation + +Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames). ## Instrumenting @@ -263,107 +259,102 @@ Filters and Pipelines can be set up to be instrumented when called. The pipeline must be setup with an [ActiveSupport::Notifications](http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html) compatible service object and a name. New pipeline objects will default to the -`HTML::Pipeline.default_instrumentation_service` object. +`HTMLPipeline.default_instrumentation_service` object. -``` ruby +```ruby # the AS::Notifications-compatible service object service = ActiveSupport::Notifications # instrument a specific pipeline -pipeline = HTML::Pipeline.new [MarkdownFilter], context +pipeline = HTMLPipeline.new [MarkdownFilter], context pipeline.setup_instrumentation "MarkdownPipeline", service # or set default instrumentation service for all new pipelines -HTML::Pipeline.default_instrumentation_service = service -pipeline = HTML::Pipeline.new [MarkdownFilter], context +HTMLPipeline.default_instrumentation_service = service +pipeline = HTMLPipeline.new [MarkdownFilter], context pipeline.setup_instrumentation "MarkdownPipeline" ``` Filters are instrumented when they are run through the pipeline. A -`call_filter.html_pipeline` event is published once the filter finishes. The -`payload` should include the `filter` name. Each filter will trigger its own +`call_filter.html_pipeline` event is published once any filter finishes; `call_text_filters` +and `call_node_filters` is published when all of the text and node filters are finished, respectively. +The `payload` should include the `filter` name. Each filter will trigger its own instrumentation call. -``` ruby +```ruby service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload| payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation` payload[:filter] #=> "MarkdownFilter" payload[:context] #=> context Hash payload[:result] #=> instance of result class - payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment + payload[:result][:output] #=> output HTML String end ``` The full pipeline is also instrumented: -``` ruby -service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload| +```ruby +service.subscribe "call_text_filters.html_pipeline" do |event, start, ending, transaction_id, payload| payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation` payload[:filters] #=> ["MarkdownFilter"] - payload[:doc] #=> HTML String or Nokogiri::DocumentFragment + payload[:doc] #=> HTML String payload[:context] #=> context Hash payload[:result] #=> instance of result class - payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment + payload[:result][:output] #=> output HTML String end ``` +## Third Party Extensions + +If you have an idea for a filter, propose it as +[an issue](https://github.com/gjtorikian/html-pipeline/issues) first. This allows us to discuss +whether the filter is a common enough use case to belong in this gem, or should be +built as an external gem. + +Here are some extensions people have built: + +- [html-pipeline-asciidoc_filter](https://github.com/asciidoctor/html-pipeline-asciidoc_filter) +- [jekyll-html-pipeline](https://github.com/gjtorikian/jekyll-html-pipeline) +- [nanoc-html-pipeline](https://github.com/burnto/nanoc-html-pipeline) +- [html-pipeline-bitly](https://github.com/dewski/html-pipeline-bitly) +- [html-pipeline-cite](https://github.com/lifted-studios/html-pipeline-cite) +- [tilt-html-pipeline](https://github.com/bradgessler/tilt-html-pipeline) +- [html-pipeline-wiki-link'](https://github.com/lifted-studios/html-pipeline-wiki-link) - WikiMedia-style wiki links +- [task_list](https://github.com/github/task_list) - GitHub flavor Markdown Task List +- [html-pipeline-nico_link](https://github.com/rutan/html-pipeline-nico_link) - An HTMLPipeline filter for [niconico](http://www.nicovideo.jp) description links +- [html-pipeline-gitlab](https://gitlab.com/gitlab-org/html-pipeline-gitlab) - This gem implements various filters for html-pipeline used by GitLab +- [html-pipeline-youtube](https://github.com/st0012/html-pipeline-youtube) - An HTMLPipeline filter for YouTube links +- [html-pipeline-flickr](https://github.com/st0012/html-pipeline-flickr) - An HTMLPipeline filter for Flickr links +- [html-pipeline-vimeo](https://github.com/dlackty/html-pipeline-vimeo) - An HTMLPipeline filter for Vimeo links +- [html-pipeline-hashtag](https://github.com/mr-dxdy/html-pipeline-hashtag) - An HTMLPipeline filter for hashtags +- [html-pipeline-linkify_github](https://github.com/jollygoodcode/html-pipeline-linkify_github) - An HTMLPipeline filter to autolink GitHub urls +- [html-pipeline-redcarpet_filter](https://github.com/bmikol/html-pipeline-redcarpet_filter) - Render Markdown source text into Markdown HTML using Redcarpet +- [html-pipeline-typogruby_filter](https://github.com/bmikol/html-pipeline-typogruby_filter) - Add Typogruby text filters to your HTMLPipeline +- [korgi](https://github.com/jodeci/korgi) - HTMLPipeline filters for links to Rails resources + ## FAQ ### 1. Why doesn't my pipeline work when there's no root element in the document? To make a pipeline work on a plain text document, put the `PlainTextInputFilter` -at the beginning of your pipeline. This will wrap the content in a `div` so the -filters have a root element to work with. If you're passing in an HTML fragment, +at the end of your `text_filter`s config . This will wrap the content in a `div` so the filters have a root element to work with. If you're passing in an HTML fragment, but it doesn't have a root element, you can wrap the content in a `div` -yourself. For example: - -```ruby -EmojiPipeline = Pipeline.new [ - PlainTextInputFilter, # <- Wraps input in a div and escapes html tags - EmojiFilter -], context - -plain_text = "Gutentag! :wave:" -EmojiPipeline.call(plain_text) - -html_fragment = "This is outside of an html element, but this isn't. :+1:" -EmojiPipeline.call("
#{html_fragment}
") # <- Wrap your own html fragments to avoid escaping -``` +yourself. ### 2. How do I customize an allowlist for `SanitizationFilter`s? -`SanitizationFilter::ALLOWLIST` is the default allowlist used if no `:allowlist` -argument is given in the context. The default is a good starting template for +`HTMLPipeline::SanitizationFilter::ALLOWLIST` is the default allowlist used if no `sanitization_config` +argument is given. The default is a good starting template for you to add additional elements. You can either modify the constant's value, or -re-define your own constant and pass that in via the context. - -## Contributing +re-define your own config and pass that in, such as: -Please review the [Contributing Guide](https://github.com/jch/html-pipeline/blob/master/CONTRIBUTING.md). - -1. [Fork it](https://help.github.com/articles/fork-a-repo) -2. Create your feature branch (`git checkout -b my-new-feature`) -3. Commit your changes (`git commit -am 'Added some feature'`) -4. Push to the branch (`git push origin my-new-feature`) -5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests) - -To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md). +```ruby +config = HTMLPipeline::SanitizerFilter::DEFAULT_CONFIG.dup +config[:elements] << "iframe" # sure, whatever you want +``` ### Contributors -Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors). - -Project is a member of the [OSS Manifesto](http://ossmanifesto.org/). - -The current maintainer is @gjtorikian - -### Releasing A New Version - -This section is for gem maintainers to cut a new version of the gem. +Thanks to all of [these contributors](https://github.com/gjtorikian/html-pipeline/graphs/contributors). -* create a new branch named `release-x.y.z` where `x.y.z` follows [semver](http://semver.org) -* update lib/html/pipeline/version.rb to next version number X.X.X -* update CHANGELOG.md. Prepare a draft with `script/changelog` -* push branch and create a new pull request -* after tests are green, merge to master -* on the master branch, run `script/release` +This project is a member of the [OSS Manifesto](http://ossmanifesto.org/). diff --git a/Rakefile b/Rakefile index f26f9adc..a86d00c4 100755 --- a/Rakefile +++ b/Rakefile @@ -1,17 +1,24 @@ #!/usr/bin/env rake # frozen_string_literal: true -require 'rubygems' -require 'bundler/setup' - -require 'bundler/gem_tasks' -require 'rake/testtask' +require "bundler/gem_tasks" +require "rubygems/package_task" +require "rake/testtask" Rake::TestTask.new do |t| - t.libs << 'test' - t.test_files = FileList['test/**/*_test.rb'] + t.libs << "test" + t.test_files = FileList["test/**/*_test.rb"] t.verbose = true t.warning = false end task default: :test + +require "rubocop/rake_task" + +RuboCop::RakeTask.new(:rubocop) + +GEMSPEC = Bundler.load_gemspec("html-pipeline.gemspec") +gem_path = Gem::PackageTask.new(GEMSPEC).define +desc "Package the ruby gem" +task "package" => [gem_path] diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 00000000..f0565678 --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,35 @@ +# Upgrade Guide + +## From v2 to v3 + +HTMLPipeline v3 is a massive improvement over this still much loved (and woefully under-maintained) project. This section will attempt to list all of the breaking changes between the two versions and provide suggestions on how to upgrade. + +### Changed namespace + +This project is now under a module called `HTMLPipeline`, not `HTML::Pipeline`. + +### Removed filters + +The following filters were removed: + +- `AutolinkFilter`: this is handled by [Commonmarker](https://www.github.com/gjtorikian/commonmarker) and can be disabled/enabled through the `MarkdownFilter`'s `context` hash +- `SyntaxHighlightFilter`: this is handled by [Commonmarker](https://www.github.com/gjtorikian/commonmarker) and can be disabled/enabled through the `MarkdownFilter`'s `context` hash +- `SanitizationFilter`: this is handled by [Selma](https://www.github.com/gjtorikian/selma); configuration can be done through the `sanitization_config` hash + +- `EmailReplyFilter` +- `CamoFilter` +- `TextFilter` + +### Changed API + +The new way to call this project is as follows: + +```ruby +HTMLPipeline.new( + text_filters: [], # array of instantiated (`.new`ed) `HTMLPipeline::TextFilter` + convert_filter:, # a filter that runs to turn text into HTML + sanitization_config: {}, # an allowlist of elements/attributes/protocols to keep + node_filters: []) # array of instantiated (`.new`ed) `HTMLPipeline::NodeFilter` +``` + +Please refer to the README for more information on constructing filters. In most cases, the underlying filter needs only a few changes, primarily to make use of [Selma](https://www.github.com/gjtorikian/selma) rather than Nokogiri. diff --git a/bin/html-pipeline b/bin/html-pipeline deleted file mode 100755 index f262ac6b..00000000 --- a/bin/html-pipeline +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env ruby -require 'html/pipeline' - -require 'optparse' - -# Accept "help", too -.map! { |a| a == 'help' ? '--help' : a } - -onParser.new do |opts| - opts.banner = <<-HELP.gsub(/^ /, '') - Usage: html-pipeline [-h] [-f] - html-pipeline [FILTER [FILTER [...]]] < file.md - cat file.md | html-pipeline [FILTER [FILTER [...]]] - HELP - - opts.separator 'Options:' - - opts.on('-f', '--filters', 'List the available filters') do - filters = HTML::Pipeline.constants.grep(/\w+Filter$/) - .map { |f| f.to_s.gsub(/Filter$/, '') } - - # Text filter doesn't work, no call method - filters -= ['Text'] - - abort <<-HELP.gsub(/^ /, '') - Available filters: - #{filters.join("\n ")} - HELP - end -end.parse! - -# Default to a GitHub-ish pipeline -if ARGV.empty? - - filters = [ - HTML::Pipeline::MarkdownFilter, - HTML::Pipeline::SanitizationFilter, - HTML::Pipeline::ImageMaxWidthFilter, - HTML::Pipeline::EmojiFilter, - HTML::Pipeline::AutolinkFilter, - HTML::Pipeline::TableOfContentsFilter - ] - - # Add syntax highlighting if rouge is present - begin - require 'rouge' - filters << HTML::Pipeline::SyntaxHighlightFilter - rescue LoadError - end - -else - - def filter_named(name) - case name - when 'Text' - raise NameError # Text filter doesn't work, no call method - end - - HTML::Pipeline.const_get("#{name}Filter") - rescue NameError => e - abort "Unknown filter '#{name}'. List filters with the -f option." - end - - filters = [] - until ARGV.empty? - name = ARGV.shift - filters << filter_named(name) - end - -end - -context = { - asset_root: '/assets', - base_url: '/', - gfm: true -} - -puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output] diff --git a/gemfiles/rails_3.gemfile b/gemfiles/rails_3.gemfile deleted file mode 100644 index d3497431..00000000 --- a/gemfiles/rails_3.gemfile +++ /dev/null @@ -1,27 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rack", "< 2" -gem "rails", "3.2.22.2" - -group :development do - gem "appraisal" - gem "bundler" - gem "rake" -end - -group :test do - gem "commonmarker", "~> 0.16", require: false - gem "email_reply_parser", "~> 0.5", require: false - gem "gemoji", "~> 2.0", require: false - gem "minitest" - gem "RedCloth", "~> 4.2.9", require: false - gem "rinku", "~> 1.7", require: false - gem "sanitize", "~> 4.6", require: false - gem "escape_utils", "~> 1.0", require: false - gem "rouge", "~> 3.1", require: false - gem "minitest-focus", "~> 1.1" -end - -gemspec path: "../" diff --git a/gemfiles/rails_4.gemfile b/gemfiles/rails_4.gemfile deleted file mode 100644 index a00d20ad..00000000 --- a/gemfiles/rails_4.gemfile +++ /dev/null @@ -1,27 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rack", "< 2" -gem "rails", "~> 4.2.6" - -group :development do - gem "appraisal" - gem "bundler" - gem "rake" -end - -group :test do - gem "commonmarker", "~> 0.16", require: false - gem "email_reply_parser", "~> 0.5", require: false - gem "gemoji", "~> 2.0", require: false - gem "minitest" - gem "RedCloth", "~> 4.2.9", require: false - gem "rinku", "~> 1.7", require: false - gem "sanitize", "~> 4.6", require: false - gem "escape_utils", "~> 1.0", require: false - gem "rouge", "~> 3.1", require: false - gem "minitest-focus", "~> 1.1" -end - -gemspec path: "../" diff --git a/gemfiles/rails_5.gemfile b/gemfiles/rails_5.gemfile deleted file mode 100644 index 1f64a4f9..00000000 --- a/gemfiles/rails_5.gemfile +++ /dev/null @@ -1,26 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rails", "~> 5.0.0" - -group :development do - gem "appraisal" - gem "bundler" - gem "rake" -end - -group :test do - gem "commonmarker", "~> 0.16", require: false - gem "email_reply_parser", "~> 0.5", require: false - gem "gemoji", "~> 2.0", require: false - gem "minitest" - gem "RedCloth", "~> 4.2.9", require: false - gem "rinku", "~> 1.7", require: false - gem "sanitize", "~> 4.6", require: false - gem "escape_utils", "~> 1.0", require: false - gem "rouge", "~> 3.1", require: false - gem "minitest-focus", "~> 1.1" -end - -gemspec path: "../" diff --git a/gemfiles/rails_6.gemfile b/gemfiles/rails_6.gemfile deleted file mode 100644 index 134e8425..00000000 --- a/gemfiles/rails_6.gemfile +++ /dev/null @@ -1,26 +0,0 @@ -# This file was generated by Appraisal - -source "https://rubygems.org" - -gem "rails", "~> 6.0.0" - -group :development do - gem "appraisal" - gem "bundler" - gem "rake" -end - -group :test do - gem "commonmarker", "~> 0.16", require: false - gem "email_reply_parser", "~> 0.5", require: false - gem "gemoji", "~> 2.0", require: false - gem "minitest" - gem "RedCloth", "~> 4.2.9", require: false - gem "rinku", "~> 1.7", require: false - gem "sanitize", "~> 4.6", require: false - gem "escape_utils", "~> 1.0", require: false - gem "rouge", "~> 3.1", require: false - gem "minitest-focus", "~> 1.1" -end - -gemspec path: "../" diff --git a/html-pipeline.gemspec b/html-pipeline.gemspec index ec88cfe2..1a7fd0ed 100644 --- a/html-pipeline.gemspec +++ b/html-pipeline.gemspec @@ -1,29 +1,39 @@ # frozen_string_literal: true -require File.expand_path('../lib/html/pipeline/version', __FILE__) +$LOAD_PATH.push(File.expand_path("lib", __dir__)) +require "html_pipeline/version" Gem::Specification.new do |gem| - gem.name = 'html-pipeline' - gem.version = HTML::Pipeline::VERSION - gem.license = 'MIT' - gem.authors = ['Ryan Tomayko', 'Jerry Cheung', 'Garen J. Torikian'] - gem.email = ['ryan@github.com', 'jerry@github.com', 'gjtorikian@gmail.com'] - gem.description = 'GitHub HTML processing filters and utilities' - gem.summary = 'Helpers for processing content through a chain of filters' - gem.homepage = 'https://github.com/jch/html-pipeline' + gem.name = "html-pipeline" + gem.version = HTMLPipeline::VERSION + gem.license = "MIT" + gem.authors = ["Garen J. Torikian"] + gem.email = ["gjtorikian@gmail.com"] + gem.description = "HTML processing filters and utilities" + gem.summary = "Helpers for processing content through a chain of filters" + gem.homepage = "https://github.com/gjtorikian/html-pipeline" - gem.files = `git ls-files -z`.split("\x0").reject { |f| f =~ %r{^(test|gemfiles|script)/} } - gem.require_paths = ['lib'] + gem.files = %x(git ls-files -z).split("\x0").reject { |f| f =~ %r{^(test|gemfiles|script)/} } + gem.require_paths = ["lib"] - gem.add_dependency 'activesupport', '>= 2' - gem.add_dependency 'nokogiri', '>= 1.4' + gem.required_ruby_version = "~> 3.1" + # https://github.com/rubygems/rubygems/pull/5852#issuecomment-1231118509 + gem.required_rubygems_version = ">= 3.3.22" - gem.post_install_message = < "https://github.com/sponsors/gjtorikian/", + "rubygems_mfa_required" => "true", + } + + gem.add_dependency("selma", "~> 0.0.1") + gem.add_dependency("zeitwerk", "~> 2.5") + + gem.post_install_message = <<~MSG + ------------------------------------------------- + Thank you for installing html-pipeline! + You must bundle filter gem dependencies. + See the html-pipeline README.md for more details: + https://github.com/gjtorikian/html-pipeline#dependencies + ------------------------------------------------- + MSG end diff --git a/lib/html-pipeline.rb b/lib/html-pipeline.rb new file mode 100644 index 00000000..d72c0c65 --- /dev/null +++ b/lib/html-pipeline.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +require_relative "html_pipeline" diff --git a/lib/html/pipeline.rb b/lib/html/pipeline.rb deleted file mode 100644 index 8f5cf8db..00000000 --- a/lib/html/pipeline.rb +++ /dev/null @@ -1,210 +0,0 @@ -# frozen_string_literal: true - -require 'nokogiri' -require 'active_support/xml_mini/nokogiri' # convert Documents to hashes - -module HTML - # GitHub HTML processing filters and utilities. This module includes a small - # framework for defining DOM based content filters and applying them to user - # provided content. - # - # See HTML::Pipeline::Filter for information on building filters. - # - # Construct a Pipeline for running multiple HTML filters. A pipeline is created once - # with one to many filters, and it then can be `call`ed many times over the course - # of its lifetime with input. - # - # filters - Array of Filter objects. Each must respond to call(doc, - # context) and return the modified DocumentFragment or a - # String containing HTML markup. Filters are performed in the - # order provided. - # default_context - The default context hash. Values specified here will be merged - # into values from the each individual pipeline run. Can NOT be - # nil. Default: empty Hash. - # result_class - The default Class of the result object for individual - # calls. Default: Hash. Protip: Pass in a Struct to get - # some semblance of type safety. - class Pipeline - autoload :VERSION, 'html/pipeline/version' - autoload :Filter, 'html/pipeline/filter' - autoload :AbsoluteSourceFilter, 'html/pipeline/absolute_source_filter' - autoload :BodyContent, 'html/pipeline/body_content' - autoload :AutolinkFilter, 'html/pipeline/autolink_filter' - autoload :CamoFilter, 'html/pipeline/camo_filter' - autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter' - autoload :EmojiFilter, 'html/pipeline/emoji_filter' - autoload :HttpsFilter, 'html/pipeline/https_filter' - autoload :ImageFilter, 'html/pipeline/image_filter' - autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter' - autoload :MarkdownFilter, 'html/pipeline/markdown_filter' - autoload :MentionFilter, 'html/pipeline/@mention_filter' - autoload :TeamMentionFilter, 'html/pipeline/@team_mention_filter' - autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter' - autoload :SanitizationFilter, 'html/pipeline/sanitization_filter' - autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter' - autoload :TextileFilter, 'html/pipeline/textile_filter' - autoload :TableOfContentsFilter, 'html/pipeline/toc_filter' - autoload :TextFilter, 'html/pipeline/text_filter' - - class MissingDependencyError < RuntimeError; end - def self.require_dependency(name, requirer) - require name - rescue LoadError => e - raise MissingDependencyError, - "Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}" - end - - # Our DOM implementation. - DocumentFragment = Nokogiri::HTML::DocumentFragment - - # Parse a String into a DocumentFragment object. When a DocumentFragment is - # provided, return it verbatim. - def self.parse(document_or_html) - document_or_html ||= '' - if document_or_html.is_a?(String) - DocumentFragment.parse(document_or_html) - else - document_or_html - end - end - - # Public: Returns an Array of Filter objects for this Pipeline. - attr_reader :filters - - # Public: Instrumentation service for the pipeline. - # Set an ActiveSupport::Notifications compatible object to enable. - attr_accessor :instrumentation_service - - # Public: String name for this Pipeline. Defaults to Class name. - attr_writer :instrumentation_name - def instrumentation_name - return @instrumentation_name if defined?(@instrumentation_name) - @instrumentation_name = self.class.name - end - - class << self - # Public: Default instrumentation service for new pipeline objects. - attr_accessor :default_instrumentation_service - end - - def initialize(filters, default_context = {}, result_class = nil) - raise ArgumentError, 'default_context cannot be nil' if default_context.nil? - @filters = filters.flatten.freeze - @default_context = default_context.freeze - @result_class = result_class || Hash - @instrumentation_service = self.class.default_instrumentation_service - end - - # Apply all filters in the pipeline to the given HTML. - # - # html - A String containing HTML or a DocumentFragment object. - # context - The context hash passed to each filter. See the Filter docs - # for more info on possible values. This object MUST NOT be modified - # in place by filters. Use the Result for passing state back. - # result - The result Hash passed to each filter for modification. This - # is where Filters store extracted information from the content. - # - # Returns the result Hash after being filtered by this Pipeline. Contains an - # :output key with the DocumentFragment or String HTML markup based on the - # output of the last filter in the pipeline. - def call(html, context = {}, result = nil) - context = @default_context.merge(context) - context = context.freeze - result ||= @result_class.new - payload = default_payload filters: @filters.map(&:name), - context: context, result: result - instrument 'call_pipeline.html_pipeline', payload do - result[:output] = - @filters.inject(html) do |doc, filter| - perform_filter(filter, doc, context, result) - end - end - result - end - - # Internal: Applies a specific filter to the supplied doc. - # - # The filter is instrumented. - # - # Returns the result of the filter. - def perform_filter(filter, doc, context, result) - payload = default_payload filter: filter.name, - context: context, result: result - instrument 'call_filter.html_pipeline', payload do - filter.call(doc, context, result) - end - end - - # Like call but guarantee the value returned is a DocumentFragment. - # Pipelines may return a DocumentFragment or a String. Callers that need a - # DocumentFragment should use this method. - def to_document(input, context = {}, result = nil) - result = call(input, context, result) - HTML::Pipeline.parse(result[:output]) - end - - # Like call but guarantee the value returned is a string of HTML markup. - def to_html(input, context = {}, result = nil) - result = call(input, context, result = nil) - output = result[:output] - if output.respond_to?(:to_html) - output.to_html - else - output.to_s - end - end - - # Public: setup instrumentation for this pipeline. - # - # Returns nothing. - def setup_instrumentation(name = nil, service = nil) - self.instrumentation_name = name - self.instrumentation_service = - service || self.class.default_instrumentation_service - end - - # Internal: if the `instrumentation_service` object is set, instruments the - # block, otherwise the block is ran without instrumentation. - # - # Returns the result of the provided block. - def instrument(event, payload = nil) - payload ||= default_payload - return yield(payload) unless instrumentation_service - instrumentation_service.instrument event, payload do |payload| - yield payload - end - end - - # Internal: Default payload for instrumentation. - # - # Accepts a Hash of additional payload data to be merged. - # - # Returns a Hash. - def default_payload(payload = {}) - { pipeline: instrumentation_name }.merge(payload) - end - end -end - -# XXX nokogiri monkey patches for 1.8 -unless ''.respond_to?(:force_encoding) - class Nokogiri::XML::Node - # Work around an issue with utf-8 encoded data being erroneously converted to - # ... some other shit when replacing text nodes. See 'utf-8 output 2' in - # user_content_test.rb for details. - def replace_with_encoding_fix(replacement) - if replacement.respond_to?(:to_str) - replacement = document.fragment("
#{replacement}
").children.first.children - end - replace_without_encoding_fix(replacement) - end - - alias replace_without_encoding_fix replace - alias replace replace_with_encoding_fix - - def swap(replacement) - replace(replacement) - self - end - end -end diff --git a/lib/html/pipeline/@team_mention_filter.rb b/lib/html/pipeline/@team_mention_filter.rb deleted file mode 100644 index 562ef344..00000000 --- a/lib/html/pipeline/@team_mention_filter.rb +++ /dev/null @@ -1,99 +0,0 @@ -# frozen_string_literal: true - -require 'set' - -module HTML - class Pipeline - # HTML filter that replaces @org/team mentions with links. Mentions within - #
, , , '
-    assert_equal body, filter(body).to_html
-  end
-
-  def test_not_replacing_mentions_in_links
-    body = '

@kneath okay

' - assert_equal body, filter(body).to_html - end - - def test_entity_encoding_and_whatnot - body = "

@kneath what's up

" - link = '@kneath' - assert_equal "

#{link} what's up

", filter(body, '/').to_html - end - - def test_html_injection - body = '

@kneath <script>alert(0)</script>

' - link = '@kneath' - assert_equal "

#{link} <script>alert(0)</script>

", - filter(body, '/').to_html - end - - def test_links_to_nothing_when_no_info_url_given - body = '

How do I @mention someone?

' - assert_equal '

How do I @mention someone?

', - filter(body, '/').to_html - end - - def test_links_to_more_info_when_info_url_given - body = '

How do I @mention someone?

' - link = '@mention' - assert_equal "

How do I #{link} someone?

", - filter(body, '/', 'https://github.com/blog/821').to_html - end - - def test_base_url_slash - body = '

Hi, @jch!

' - link = '@jch' - assert_equal "

Hi, #{link}!

", - filter(body, '/').to_html - end - - def test_base_url_under_custom_route - body = '

Hi, @jch!

' - link = '@jch' - assert_equal "

Hi, #{link}!

", - filter(body, '/userprofile').to_html - end - - def test_base_url_slash_with_tilde - body = '

Hi, @jch!

' - link = '@jch' - assert_equal "

Hi, #{link}!

", - filter(body, '/~').to_html - end - - MarkdownPipeline = - HTML::Pipeline.new [ - HTML::Pipeline::MarkdownFilter, - HTML::Pipeline::MentionFilter - ] - - def mentioned_usernames - result = {} - MarkdownPipeline.call(@body, {}, result) - result[:mentioned_usernames] - end - - def test_matches_usernames_in_body - @body = '@test how are you?' - assert_equal %w[test], mentioned_usernames - end - - def test_matches_usernames_with_dashes - @body = 'hi @some-user' - assert_equal %w[some-user], mentioned_usernames - end - - def test_matches_usernames_followed_by_a_single_dot - @body = 'okay @some-user.' - assert_equal %w[some-user], mentioned_usernames - end - - def test_matches_usernames_followed_by_multiple_dots - @body = 'okay @some-user...' - assert_equal %w[some-user], mentioned_usernames - end - - def test_does_not_match_email_addresses - @body = 'aman@tmm1.net' - assert_equal [], mentioned_usernames - end - - def test_does_not_match_domain_name_looking_things - @body = 'we need a @github.com email' - assert_equal [], mentioned_usernames - end - - def test_does_not_match_organization_team_mentions - @body = 'we need to @github/enterprise know' - assert_equal [], mentioned_usernames - end - - def test_matches_colon_suffixed_names - @body = '@tmm1: what do you think?' - assert_equal %w[tmm1], mentioned_usernames - end - - def test_matches_list_of_names - @body = '@defunkt @atmos @kneath' - assert_equal %w[defunkt atmos kneath], mentioned_usernames - end - - def test_matches_list_of_names_with_commas - @body = '/cc @defunkt, @atmos, @kneath' - assert_equal %w[defunkt atmos kneath], mentioned_usernames - end - - def test_matches_inside_brackets - @body = '(@mislav) and [@rtomayko]' - assert_equal %w[mislav rtomayko], mentioned_usernames - end - - def test_doesnt_ignore_invalid_users - @body = '@defunkt @mojombo and @somedude' - assert_equal %w[defunkt mojombo somedude], mentioned_usernames - end - - def test_returns_distinct_set - @body = '/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt' - assert_equal %w[defunkt atmos kneath], mentioned_usernames - end - - def test_does_not_match_inline_code_block_with_multiple_code_blocks - @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`" - assert_equal %w[], mentioned_usernames - end - - def test_mention_at_end_of_parenthetical_sentence - @body = "(We're talking 'bout @ymendel.)" - assert_equal %w[ymendel], mentioned_usernames - end - - def test_username_pattern_can_be_customized - body = '

@_abc: test.

' - doc = Nokogiri::HTML::DocumentFragment.parse(body) - - res = filter(doc, '/', nil, /(_[a-z]{3})/) - - link = '@_abc' - assert_equal "

#{link}: test.

", - res.to_html - end - - def test_filter_does_not_create_a_new_object_for_default_username_pattern - body = '
@test
' - doc = Nokogiri::HTML::DocumentFragment.parse(body) - - filter(doc.clone, '/', nil) - pattern_count = HTML::Pipeline::MentionFilter::MentionPatterns.length - filter(doc.clone, '/', nil) - - assert_equal pattern_count, HTML::Pipeline::MentionFilter::MentionPatterns.length - filter(doc.clone, '/', nil, /test/) - assert_equal pattern_count + 1, HTML::Pipeline::MentionFilter::MentionPatterns.length - end - - def test_mention_link_filter - filter = HTML::Pipeline::MentionFilter.new nil - expected = "@hubot" - assert_equal expected, filter.mention_link_filter('@hubot') - end -end diff --git a/test/html/pipeline/plain_text_input_filter_test.rb b/test/html/pipeline/plain_text_input_filter_test.rb deleted file mode 100644 index c5a8c148..00000000 --- a/test/html/pipeline/plain_text_input_filter_test.rb +++ /dev/null @@ -1,24 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' - -class HTML::Pipeline::PlainTextInputFilterTest < Minitest::Test - PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter - - def test_fails_when_given_a_documentfragment - body = '

heyo

' - doc = Nokogiri::HTML::DocumentFragment.parse(body) - assert_raises(TypeError) { PlainTextInputFilter.call(doc, {}) } - end - - def test_wraps_input_in_a_div_element - doc = PlainTextInputFilter.call('howdy pahtner', {}) - assert_equal '
howdy pahtner
', doc.to_s - end - - def test_html_escapes_plain_text_input - doc = PlainTextInputFilter.call('See: ', {}) - assert_equal '
See: <http://example.org>
', - doc.to_s - end -end diff --git a/test/html/pipeline/require_helper_test.rb b/test/html/pipeline/require_helper_test.rb deleted file mode 100644 index e4aeafbc..00000000 --- a/test/html/pipeline/require_helper_test.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' - -class HTML::Pipeline::RequireHelperTest < Minitest::Test - def test_works_with_existing - HTML::Pipeline.require_dependency('rake', 'SomeClass') - end - - def test_raises_mising_dependency_error - assert_raises HTML::Pipeline::MissingDependencyError do - HTML::Pipeline.require_dependency('non-existant', 'SomeClass') - end - end - - def test_raises_error_including_message - error = assert_raises(HTML::Pipeline::MissingDependencyError) do - HTML::Pipeline.require_dependency('non-existant', 'SomeClass') - end - assert_includes(error.message, "Missing dependency 'non-existant' for SomeClass. See README.md for details.") - end - - def test_raises_error_includes_underlying_message - error = assert_raises HTML::Pipeline::MissingDependencyError do - HTML::Pipeline.require_dependency('non-existant', 'SomeClass') - end - assert_includes(error.message, 'LoadError: cannot load such file') - end -end diff --git a/test/html/pipeline/sanitization_filter_test.rb b/test/html/pipeline/sanitization_filter_test.rb deleted file mode 100644 index 52d35ea2..00000000 --- a/test/html/pipeline/sanitization_filter_test.rb +++ /dev/null @@ -1,183 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' - -class HTML::Pipeline::SanitizationFilterTest < Minitest::Test - SanitizationFilter = HTML::Pipeline::SanitizationFilter - - def test_removing_script_tags - orig = %(

) - html = SanitizationFilter.call(orig).to_s - refute_match /script/, html - end - - def test_removing_style_tags - orig = %(

) - html = SanitizationFilter.call(orig).to_s - refute_match /style/, html - end - - def test_removing_style_attributes - orig = %(

YO DAWG

) - html = SanitizationFilter.call(orig).to_s - refute_match /font-size/, html - refute_match /style/, html - end - - def test_removing_script_event_handler_attributes - orig = %(YO DAWG) - html = SanitizationFilter.call(orig).to_s - refute_match /javscript/, html - refute_match /onclick/, html - end - - def test_sanitizes_li_elements_not_contained_in_ul_or_ol - stuff = "a\n
  • b
  • \nc" - html = SanitizationFilter.call(stuff).to_s - assert_equal "a\nb\nc", html - end - - def test_does_not_sanitize_li_elements_contained_in_ul_or_ol - stuff = "a\n
    • b
    \nc" - assert_equal stuff, SanitizationFilter.call(stuff).to_s - end - - def test_github_specific_protocols_are_not_removed - stuff = 'Spill this yo and so on' - assert_equal stuff, SanitizationFilter.call(stuff).to_s - end - - def test_unknown_schemes_are_removed - stuff = 'Wat is this' - html = SanitizationFilter.call(stuff).to_s - assert_equal 'Wat is this', html - end - - def test_allowlisted_longdesc_schemes_are_allowed - stuff = '' - html = SanitizationFilter.call(stuff).to_s - assert_equal '', html - end - - def test_weird_longdesc_schemes_are_removed - stuff = '' - html = SanitizationFilter.call(stuff).to_s - assert_equal '', html - end - - def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes - stuff = 'No href for you' - filter = SanitizationFilter.new(stuff, anchor_schemes: []) - html = filter.call.to_s - assert_equal 'No href for you', html - end - - def test_custom_anchor_schemes_are_not_removed - stuff = 'Wat is this' - filter = SanitizationFilter.new(stuff, anchor_schemes: ['something-weird']) - html = filter.call.to_s - assert_equal stuff, html - end - - def test_anchor_schemes_are_merged_with_other_anchor_restrictions - stuff = 'Wat is this' - allowlist = { - elements: ['a'], - attributes: { 'a' => %w[href ping] }, - protocols: { 'a' => { 'ping' => ['http'] } } - } - filter = SanitizationFilter.new(stuff, allowlist: allowlist, anchor_schemes: ['something-weird']) - html = filter.call.to_s - assert_equal 'Wat is this', html - end - - def test_uses_anchor_schemes_from_allowlist_when_not_separately_specified - stuff = 'Wat is this' - allowlist = { - elements: ['a'], - attributes: { 'a' => ['href'] }, - protocols: { 'a' => { 'href' => ['something-weird'] } } - } - filter = SanitizationFilter.new(stuff, allowlist: allowlist) - html = filter.call.to_s - assert_equal stuff, html - end - - def test_allowlist_contains_default_anchor_schemes - assert_equal SanitizationFilter::ALLOWLIST[:protocols]['a']['href'], ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'] - end - - def test_allowlist_from_full_constant - stuff = 'Wat is this' - filter = SanitizationFilter.new(stuff, allowlist: SanitizationFilter::FULL) - html = filter.call.to_s - assert_equal 'Wat is this', html - end - - def test_exports_default_anchor_schemes - assert_equal SanitizationFilter::ANCHOR_SCHEMES, ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'] - end - - def test_script_contents_are_removed - orig = '' - assert_equal '', SanitizationFilter.call(orig).to_s - end - - def test_table_rows_and_cells_removed_if_not_in_table - orig = %(FooBar) - assert_equal 'FooBar', SanitizationFilter.call(orig).to_s - end - - def test_table_sections_removed_if_not_in_table - orig = %(Foo) - assert_equal 'Foo', SanitizationFilter.call(orig).to_s - end - - def test_table_sections_are_not_removed - orig = %( - - - -
    Column 1
    Sum
    1
    ) - assert_equal orig, SanitizationFilter.call(orig).to_s - end - - def test_summary_tag_are_not_removed - orig = %(Foo) - assert_equal orig, SanitizationFilter.call(orig).to_s - end - - def test_details_tag_and_open_attribute_are_not_removed - orig = %(
    Foo
    ) - assert_equal orig, SanitizationFilter.call(orig).to_s - end - - def test_nested_details_tag_are_not_removed - orig = <<-NESTED -
    - Foo -
    - Bar - Baz -
    - Qux -
    - NESTED - assert_equal orig, SanitizationFilter.call(orig).to_s - end - - def test_deprecated_whitelist_context - orig = %(

    ) - context = { whitelist: ['table'] } - - assert_equal ['table'], SanitizationFilter.new(orig, context).allowlist - end - - def test_deprecation_warning_whitelist - orig = %(

    ) - _stdout, stderror = capture_io do - SanitizationFilter.new(orig).whitelist - end - assert_match "[DEPRECATION] 'whitelist' is deprecated. Please use 'allowlist' instead.", stderror - end -end diff --git a/test/html/pipeline/syntax_highlight_filter_test.rb b/test/html/pipeline/syntax_highlight_filter_test.rb deleted file mode 100644 index 9d4a50a8..00000000 --- a/test/html/pipeline/syntax_highlight_filter_test.rb +++ /dev/null @@ -1,65 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' -require 'escape_utils' - -SyntaxHighlightFilter = HTML::Pipeline::SyntaxHighlightFilter - -class HTML::Pipeline::SyntaxHighlightFilterTest < Minitest::Test - def test_highlight_default - filter = SyntaxHighlightFilter.new \ - '
    hello
    ', highlight: 'coffeescript' - - doc = filter.call - assert !doc.css('.highlight').empty? - assert !doc.css('.highlight-coffeescript').empty? - end - - def test_highlight_default_will_not_override - filter = SyntaxHighlightFilter.new \ - "
    hello
    ", highlight: 'coffeescript' - - doc = filter.call - assert doc.css('.highlight-coffeescript').empty? - assert !doc.css('.highlight-c').empty? - end - - def test_highlight_does_not_remove_pre_tag - filter = SyntaxHighlightFilter.new \ - "
    hello
    ", highlight: 'coffeescript' - - doc = filter.call - - assert !doc.css('pre').empty? - end - - def test_highlight_allows_optional_scope - filter = SyntaxHighlightFilter.new \ - "
    hello
    ", highlight: 'coffeescript', scope: 'test-scope' - - doc = filter.call - - assert !doc.css('pre.test-scope').empty? - end - - def test_highlight_keeps_the_pre_tags_lang - filter = SyntaxHighlightFilter.new \ - "
    hello
    ", highlight: 'coffeescript' - - doc = filter.call - - assert !doc.css('pre[lang=c]').empty? - end - - def test_highlight_handles_nested_pre_tags - inner_code = "
    console.log('i am nested!')
    " - escaped = CGI.escape_html(inner_code) - html = "
    #{escaped}
    " - filter = SyntaxHighlightFilter.new html, highlight: 'html' - - doc = filter.call - - assert_equal 2, doc.css('span[class=nt]').length - assert_equal CGI.unescape_html(escaped), doc.inner_text - end -end diff --git a/test/html/pipeline/team_mention_filter_test.rb b/test/html/pipeline/team_mention_filter_test.rb deleted file mode 100644 index 127c2c19..00000000 --- a/test/html/pipeline/team_mention_filter_test.rb +++ /dev/null @@ -1,202 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' - -class HTML::Pipeline::TeamMentionFilterTest < Minitest::Test - def filter(html, base_url = '/', team_pattern = nil) - HTML::Pipeline::TeamMentionFilter.call(html, base_url: base_url, team_pattern: team_pattern) - end - - def test_filtering_plain_text - body = '

    @github/team: check it out.

    ' - res = filter(body, '/') - - link = '@github/team' - assert_equal "

    #{link}: check it out.

    ", - res.to_html - end - - def test_filtering_a_documentfragment - body = '

    @github/team: check it out.

    ' - doc = Nokogiri::HTML::DocumentFragment.parse(body) - - res = filter(doc, '/') - assert_same doc, res - - link = '@github/team' - assert_equal "

    #{link}: check it out.

    ", - res.to_html - end - - def test_not_replacing_mentions_in_pre_tags - body = '
    @github/team: okay
    ' - assert_equal body, filter(body).to_html - end - - def test_not_replacing_mentions_in_code_tags - body = '

    @github/team: okay

    ' - assert_equal body, filter(body).to_html - end - - def test_not_replacing_mentions_in_style_tags - body = '' - assert_equal body, filter(body).to_html - end - - def test_not_replacing_mentions_in_links - body = '

    @github/team okay

    ' - assert_equal body, filter(body).to_html - end - - def test_entity_encoding_and_whatnot - body = "

    @github/team what's up

    " - link = '@github/team' - assert_equal "

    #{link} what's up

    ", filter(body, '/').to_html - end - - def test_html_injection - body = '

    @github/team <script>alert(0)</script>

    ' - link = '@github/team' - assert_equal "

    #{link} <script>alert(0)</script>

    ", - filter(body, '/').to_html - end - - def test_links_to_nothing_with_user_mention - body = '

    Hi, @kneath

    ' - assert_equal '

    Hi, @kneath

    ', - filter(body, '/').to_html - end - - def test_base_url_slash - body = '

    Hi, @github/team!

    ' - link = '@github/team' - assert_equal "

    Hi, #{link}!

    ", - filter(body, '/').to_html - end - - def test_base_url_under_custom_route - body = '

    Hi, @org/team!

    ' - link = '@org/team' - assert_equal "

    Hi, #{link}!

    ", - filter(body, 'www.github.com').to_html - end - - def test_base_url_slash_with_tilde - body = '

    Hi, @github/team!

    ' - link = '@github/team' - assert_equal "

    Hi, #{link}!

    ", - filter(body, '/~').to_html - end - - def test_multiple_team_mentions - body = '

    Hi, @github/whale and @github/donut!

    ' - link_whale = '@github/whale' - link_donut = '@github/donut' - assert_equal "

    Hi, #{link_whale} and #{link_donut}!

    ", - filter(body).to_html - end - - MarkdownPipeline = - HTML::Pipeline.new [ - HTML::Pipeline::MarkdownFilter, - HTML::Pipeline::TeamMentionFilter - ] - - def mentioned_teams - result = {} - MarkdownPipeline.call(@body, {}, result) - result[:mentioned_teams] - end - - def test_matches_teams_in_body - @body = '@test/team how are you?' - assert_equal %w[team], mentioned_teams - end - - def test_matches_orgs_with_dashes - @body = 'hi @some-org/team' - assert_equal %w[team], mentioned_teams - end - - def test_matches_teams_with_dashes - @body = 'hi @github/some-team' - assert_equal %w[some-team], mentioned_teams - end - - def test_matches_teams_followed_by_a_single_dot - @body = 'okay @github/team.' - assert_equal %w[team], mentioned_teams - end - - def test_matches_teams_followed_by_multiple_dots - @body = 'okay @github/team...' - assert_equal %w[team], mentioned_teams - end - - def test_does_not_match_email_addresses - @body = 'aman@tmm1.net' - assert_equal [], mentioned_teams - end - - def test_does_not_match_domain_name_looking_things - @body = 'we need a @github.com email' - assert_equal [], mentioned_teams - end - - def test_does_not_match_user_mentions - @body = 'we need to @enterprise know' - assert_equal [], mentioned_teams - end - - def test_matches_colon_suffixed_team_names - @body = '@github/team: what do you think?' - assert_equal %w[team], mentioned_teams - end - - def test_matches_list_of_teams - @body = '@github/whale @github/donut @github/green' - assert_equal %w[whale donut green], mentioned_teams - end - - def test_matches_list_of_teams_with_commas - @body = '/cc @github/whale, @github/donut, @github/green' - assert_equal %w[whale donut green], mentioned_teams - end - - def test_matches_inside_brackets - @body = '(@github/whale) and [@github/donut]' - assert_equal %w[whale donut], mentioned_teams - end - - def test_returns_distinct_set - @body = '/cc @github/whale, @github/donut, @github/whale, @github/whale' - assert_equal %w[whale donut], mentioned_teams - end - - def test_does_not_match_inline_code_block_with_multiple_code_blocks - @body = "something\n\n`/cc @github/whale @github/donut @github/green` `/cc @donut/donut`" - assert_equal %w[], mentioned_teams - end - - def test_mention_at_end_of_parenthetical_sentence - @body = "(We're talking 'bout @some-org/some-team.)" - assert_equal %w[some-team], mentioned_teams - end - - def test_team_pattern_can_be_customized - body = '

    @_abc/XYZ: test

    ' - doc = Nokogiri::HTML::DocumentFragment.parse(body) - - res = filter(doc, '/', /@(_[a-z]{3})\/([A-Z]{3})/) - - link = '@_abc/XYZ' - assert_equal "

    #{link}: test

    ", - res.to_html - end - - def test_mention_link_filter - filter = HTML::Pipeline::TeamMentionFilter.new nil - expected = "@bot/hubot" - assert_equal expected, filter.mention_link_filter('@bot/hubot') - end -end diff --git a/test/html/pipeline/toc_filter_test.rb b/test/html/pipeline/toc_filter_test.rb deleted file mode 100644 index b79ea238..00000000 --- a/test/html/pipeline/toc_filter_test.rb +++ /dev/null @@ -1,141 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' - -class HTML::Pipeline::TableOfContentsFilterTest < Minitest::Test - TocFilter = HTML::Pipeline::TableOfContentsFilter - - TocPipeline = - HTML::Pipeline.new [ - HTML::Pipeline::TableOfContentsFilter - ] - - def toc - result = {} - TocPipeline.call(@orig, {}, result) - result[:toc] - end - - def test_anchors_are_added_properly - orig = %(

    Ice cube

    Will swarm on any motherfucker in a blue uniform

    ) - assert_includes TocFilter.call(orig).to_s, 'Ice cube

    Will swarm on any motherfucker in a blue uniform

    ) - assert_includes toc, %(
    ) - - assert_equal expected, rendered_toc - end - end -end diff --git a/test/html/pipeline_test.rb b/test/html/pipeline_test.rb deleted file mode 100644 index 28be8bbb..00000000 --- a/test/html/pipeline_test.rb +++ /dev/null @@ -1,76 +0,0 @@ -# frozen_string_literal: true - -require 'test_helper' -require 'helpers/mocked_instrumentation_service' - -class HTML::PipelineTest < Minitest::Test - Pipeline = HTML::Pipeline - class TestFilter - def self.call(input, _context, _result) - input.reverse - end - end - - def setup - @context = {} - @result_class = Hash - @pipeline = Pipeline.new [TestFilter], @context, @result_class - end - - def test_filter_instrumentation - service = MockedInstrumentationService.new - events = service.subscribe 'call_filter.html_pipeline' - @pipeline.instrumentation_service = service - filter(body = 'hello') - event, payload, res = events.pop - assert event, 'event expected' - assert_equal 'call_filter.html_pipeline', event - assert_equal TestFilter.name, payload[:filter] - assert_equal @pipeline.class.name, payload[:pipeline] - assert_equal body.reverse, payload[:result][:output] - end - - def test_pipeline_instrumentation - service = MockedInstrumentationService.new - events = service.subscribe 'call_pipeline.html_pipeline' - @pipeline.instrumentation_service = service - filter(body = 'hello') - event, payload, res = events.pop - assert event, 'event expected' - assert_equal 'call_pipeline.html_pipeline', event - assert_equal @pipeline.filters.map(&:name), payload[:filters] - assert_equal @pipeline.class.name, payload[:pipeline] - assert_equal body.reverse, payload[:result][:output] - end - - def test_default_instrumentation_service - service = 'default' - Pipeline.default_instrumentation_service = service - pipeline = Pipeline.new [], @context, @result_class - assert_equal service, pipeline.instrumentation_service - ensure - Pipeline.default_instrumentation_service = nil - end - - def test_setup_instrumentation - assert_nil @pipeline.instrumentation_service - - service = MockedInstrumentationService.new - events = service.subscribe 'call_pipeline.html_pipeline' - @pipeline.setup_instrumentation name = 'foo', service - - assert_equal service, @pipeline.instrumentation_service - assert_equal name, @pipeline.instrumentation_name - - filter(body = 'foo') - - event, payload, res = events.pop - assert event, 'expected event' - assert_equal name, payload[:pipeline] - assert_equal body.reverse, payload[:result][:output] - end - - def filter(input) - @pipeline.call(input) - end -end diff --git a/test/html_pipeline/convert_filter/markdown_filter_test.rb b/test/html_pipeline/convert_filter/markdown_filter_test.rb new file mode 100644 index 00000000..e656e080 --- /dev/null +++ b/test/html_pipeline/convert_filter/markdown_filter_test.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +require "test_helper" + +MarkdownFilter = HTMLPipeline::ConvertFilter::MarkdownFilter + +class HTMLPipeline + class MarkdownFilterTest < Minitest::Test + def setup + @haiku = + "Pointing at the moon\n" \ + "Reminded of simple things\n" \ + "Moments matter most" + @links = + "See http://example.org/ for more info" + @code = + "```\n" \ + "def hello()" \ + " 'world'" \ + "end" \ + "```" + @header = <<~DOC + # Words + + Some words + + ## Words + + More words? + DOC + end + + def test_fails_when_given_a_non_string + assert_raises(TypeError) { MarkdownFilter.call(23, context: {}) } + end + + def test_gfm_enabled_by_default + doc = MarkdownFilter.call(@haiku) + + assert_equal(2, Nokogiri.parse(doc).search("br").size) + end + + def test_disabling_hardbreaks + doc = MarkdownFilter.call(@haiku, context: { markdown: { render: { hardbreaks: false } } }) + + assert_equal(0, Nokogiri.parse(doc).search("br").size) + end + + def test_fenced_code_blocks + doc = MarkdownFilter.call(@code) + + assert_equal(1, Nokogiri.parse(doc).search("pre").size) + end + + def test_fenced_code_blocks_with_language + doc = MarkdownFilter.call(@code.sub("```", "``` ruby")) + + assert_equal(1, Nokogiri.parse(doc).search("pre").size) + assert_equal("ruby", Nokogiri.parse(doc).search("pre").first["lang"]) + end + + def test_standard_extensions + iframe = "" + iframe_escaped = "<iframe src='http://www.google.com'></iframe>" + doc = MarkdownFilter.call(iframe, context: { markdown: { render: { unsafe_: true } } }) + + assert_equal(doc, iframe_escaped) + end + + def test_changing_extensions + iframe = "" + doc = MarkdownFilter.call(iframe, context: { markdown: { extension: { tagfilter: false }, render: { unsafe_: true } } }) + + assert_equal(doc, iframe) + end + + def test_without_tagfilter + options = { render: { unsafe_: true }, extension: { tagfilter: false } } + script = "" + results = MarkdownFilter.call(script, context: { markdown: options }) + + assert_equal(results, script) + end + end +end + +class GFMTest < Minitest::Test + def setup + @gfm = MarkdownFilter + @context = { markdown: { render: { unsafe_: true }, plugins: { syntax_highlighter: nil } } } + end + + def test_not_touch_single_underscores_inside_words + assert_equal("

    foo_bar

    ", + @gfm.call("foo_bar", context: @context)) + end + + def test_not_touch_underscores_in_code_blocks + assert_equal("
    foo_bar_baz\n
    ", + @gfm.call(" foo_bar_baz", context: @context)) + end + + def test_not_touch_underscores_in_pre_blocks + assert_equal("
    \nfoo_bar_baz\n
    ", + @gfm.call("
    \nfoo_bar_baz\n
    ", context: @context)) + end + + def test_not_touch_two_or_more_underscores_inside_words + assert_equal("

    foo_bar_baz

    ", + @gfm.call("foo_bar_baz", context: @context)) + end + + def test_turn_newlines_into_br_tags_in_simple_cases + assert_equal("

    foo
    \nbar

    ", + @gfm.call("foo \nbar", context: @context)) + end + + def test_convert_newlines_in_all_groups + assert_equal("

    apple
    \npear
    \norange

    \n" \ + "

    ruby
    \npython
    \nerlang

    ", + @gfm.call("apple \npear \norange \n\nruby \npython \nerlang", context: @context)) + end + + def test_convert_newlines_in_even_long_groups + assert_equal("

    apple
    \npear
    \norange
    \nbanana

    \n" \ + "

    ruby
    \npython
    \nerlang

    ", + @gfm.call("apple \npear \norange \nbanana \n\nruby \npython \nerlang", context: @context)) + end + + def test_not_convert_newlines_in_lists + options = Commonmarker::Config.merged_with_defaults({}) + options[:extension].delete(:header_ids) + + assert_equal("

    foo

    \n

    bar

    ", + @gfm.call("# foo\n# bar", context: { markdown: options })) + assert_equal("
      \n
    • foo
    • \n
    • bar
    • \n
    ", + @gfm.call("* foo\n* bar", context: { markdown: options })) + end +end diff --git a/test/html_pipeline/node_filter/absolute_source_filter_test.rb b/test/html_pipeline/node_filter/absolute_source_filter_test.rb new file mode 100644 index 00000000..0cd36a10 --- /dev/null +++ b/test/html_pipeline/node_filter/absolute_source_filter_test.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require "test_helper" + +AbsoluteSourceFilter = HTMLPipeline::NodeFilter::AbsoluteSourceFilter +class HTMLPipeline + class NodeFilter + class AbsoluteSourceFilterTest < Minitest::Test + def setup + @image_base_url = "http://assets.example.com" + @image_subpage_url = "http://blog.example.com/a/post" + @options = { + image_base_url: @image_base_url, + image_subpage_url: @image_subpage_url, + } + end + + def test_rewrites_root_urls + orig = %(

    ) + + assert_equal("

    ", + AbsoluteSourceFilter.call(orig, context: @options).to_s) + end + + def test_rewrites_relative_urls + orig = %(

    ) + + assert_equal("

    ", + AbsoluteSourceFilter.call(orig, context: @options).to_s) + end + + def test_does_not_rewrite_absolute_urls + orig = %(

    ) + result = AbsoluteSourceFilter.call(orig, context: @options).to_s + + refute_match(/@image_base_url/, result) + refute_match(/@image_subpage_url/, result) + end + + def test_fails_when_context_is_missing + assert_raises(RuntimeError) do + AbsoluteSourceFilter.call('', context: {}) + end + assert_raises(RuntimeError) do + AbsoluteSourceFilter.call('', context: {}) + end + end + + def test_tells_you_where_context_is_required + exception = assert_raises(RuntimeError) do + AbsoluteSourceFilter.call('', context: {}) + end + + assert_match("HTMLPipeline::NodeFilter::AbsoluteSourceFilter", exception.message) + + exception = assert_raises(RuntimeError) do + AbsoluteSourceFilter.call('', context: {}) + end + + assert_match("HTMLPipeline::NodeFilter::AbsoluteSourceFilter", exception.message) + end + end + end +end diff --git a/test/html_pipeline/node_filter/emoji_filter_test.rb b/test/html_pipeline/node_filter/emoji_filter_test.rb new file mode 100644 index 00000000..7fcfc2d7 --- /dev/null +++ b/test/html_pipeline/node_filter/emoji_filter_test.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +require "test_helper" + +EmojiFilterFilter = HTMLPipeline::NodeFilter::EmojiFilter +class HTMLPipeline + class EmojiFilterTest < Minitest::Test + def setup + @emoji_filter = HTMLPipeline::NodeFilter::EmojiFilter + end + + def test_emojify + orig = "

    :shipit:

    " + result = @emoji_filter.call(orig, context: { asset_root: "https://foo.com" }) + + assert_match("https://foo.com/emoji/shipit.png", result) + end + + def test_uri_encoding + result = @emoji_filter.call("

    :+1:

    ", context: { asset_root: "https://foo.com" }) + + assert_match("https://foo.com/emoji/unicode/1f44d.png", result) + end + + def test_required_context_validation + exception = assert_raises(ArgumentError) do + @emoji_filter.call("", context: {}) + end + + assert_match(/:asset_root/, exception.message) + end + + def test_custom_asset_path + result = @emoji_filter.call("

    :+1:

    ", context: { asset_path: ":file_name", asset_root: "https://foo.com" }) + + assert_match("https://foo.com/unicode/1f44d.png", result) + end + + def test_not_emojify_in_code_tags + body = ":shipit:" + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com" }) + + assert_equal(body, result) + end + + def test_not_emojify_in_tt_tags + body = ":shipit:" + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com" }) + + assert_equal(body, result) + end + + def test_not_emojify_in_pre_tags + body = "
    :shipit:
    " + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com" }) + + assert_equal(body, result) + end + + def test_not_emojify_in_custom_single_tag_foo + body = ":shipit:" + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com", ignored_ancestor_tags: ["foo"] }) + + assert_equal(body, result) + end + + def test_not_emojify_in_custom_multiple_tags_foo_and_bar + body = ":shipit:" + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com", ignored_ancestor_tags: ["foo", "bar"] }) + + assert_equal(body, result) + end + + def test_img_tag_attributes + body = "

    :shipit:

    " + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com" }) + + assert_match(%(:shipit:), result) + end + + def test_img_tag_attributes_can_be_customized + body = "

    :shipit:

    " + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com", img_attrs: Hash("draggable" => "false", "height" => nil, "width" => nil, "align" => nil) }) + + assert_match(%(:shipit:), result) + end + + def test_img_attrs_value_can_accept_proclike_object + remove_colons = ->(name) { name.delete(":") } + body = "

    :shipit:

    " + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com", img_attrs: Hash("title" => remove_colons) }) + + assert_match(%(:shipit:), result) + end + + def test_img_attrs_can_accept_symbolized_keys + body = "

    :shipit:

    " + result = @emoji_filter.call(body, context: { asset_root: "https://foo.com", img_attrs: Hash(draggable: false, height: nil, width: nil, align: nil) }) + + assert_match(%(:shipit:), result) + end + + def test_works_with_gemoji + require "gemojione" + + HTMLPipeline::NodeFilter::EmojiFilter.stub(:gemoji_loaded?, false) do + body = ":flag_ar:" + result = HTMLPipeline::NodeFilter::EmojiFilter.call(body, context: { asset_root: "https://foo.com" }) + + assert_equal(%(:flag_ar:), result) + end + end + + def test_gemoji_can_accept_symbolized_keys + require "gemojione" + HTMLPipeline::NodeFilter::EmojiFilter.stub(:gemoji_loaded?, false) do + body = ":flag_ar:" + result = HTMLPipeline::NodeFilter::EmojiFilter.call(body, context: { asset_root: "https://coolwebsite.com", img_attrs: Hash(draggable: false, height: nil, width: nil, align: nil) }) + + assert_equal(%(:flag_ar:), result) + end + end + end +end diff --git a/test/html_pipeline/node_filter/https_filter_test.rb b/test/html_pipeline/node_filter/https_filter_test.rb new file mode 100644 index 00000000..98eb25b9 --- /dev/null +++ b/test/html_pipeline/node_filter/https_filter_test.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require "test_helper" + +HttpsFilter = HTMLPipeline::NodeFilter::HttpsFilter + +class HTMLPipeline + class HttpsFilterTest < Minitest::Test + def setup + @options = { base_url: "http://github.com" } + end + + def test_http + assert_equal(%(github.com), + HttpsFilter.call(%(github.com), context: @options)) + end + + def test_https + assert_equal(%(github.com), + HttpsFilter.call(%(github.com), context: @options)) + end + + def test_subdomain + assert_equal(%(github.com), + HttpsFilter.call(%(github.com), context: @options)) + end + + def test_other + assert_equal(%(github.io), + HttpsFilter.call(%(github.io), context: @options)) + end + end +end diff --git a/test/html_pipeline/node_filter/image_max_width_filter_test.rb b/test/html_pipeline/node_filter/image_max_width_filter_test.rb new file mode 100644 index 00000000..df7f4f9d --- /dev/null +++ b/test/html_pipeline/node_filter/image_max_width_filter_test.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class ImageMaxWidthFilterTest < Minitest::Test + def setup + @filter = HTMLPipeline::NodeFilter::ImageMaxWidthFilter + end + + def test_rewrites_image_style_tags + body = '

    Screenshot:

    ' + res = @filter.call(body) + + assert_equal('

    Screenshot:

    ', + res) + end + + def test_leaves_existing_image_style_tags_alone + body = '

    ' + + res = @filter.call(body) + + assert_equal('

    ', + res) + end + + def test_links_to_image + body = '

    Screenshot:

    ' + + res = @filter.call(body) + + assert_equal('

    Screenshot:

    ', + res) + end + + def test_doesnt_link_to_image_when_already_linked + body = '

    Screenshot:

    ' + + res = @filter.call(body) + + assert_equal('

    Screenshot:

    ', + res) + end + + def test_doesnt_screw_up_inlined_images + body = '

    Screenshot , yes, this is a screenshot indeed.

    ' + + res = @filter.call(body) + + assert_equal('

    Screenshot , yes, this is a screenshot indeed.

    ', res) + end + end +end diff --git a/test/html_pipeline/node_filter/mention_filter_test.rb b/test/html_pipeline/node_filter/mention_filter_test.rb new file mode 100644 index 00000000..bcd9b2d1 --- /dev/null +++ b/test/html_pipeline/node_filter/mention_filter_test.rb @@ -0,0 +1,219 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class MentionFilterTest < Minitest::Test + def setup + @filter = HTMLPipeline::NodeFilter::MentionFilter + @context = { base_url: "/", info_url: nil, username_pattern: nil } + + @pipeline = HTMLPipeline.new(convert_filter: + HTMLPipeline::ConvertFilter::MarkdownFilter.new, + node_filters: [ + HTMLPipeline::NodeFilter::MentionFilter.new, + ]) + end + + def mentioned_usernames(body) + result = {} + result = @pipeline.call(body, result: result) + result[:mentioned_usernames] + end + @pipeline = + def test_filtering_plain_text + body = "

    @kneath: check it out.

    " + res = @filter.call(body, context: @context) + + link = '@kneath' + + assert_equal("

    #{link}: check it out.

    ", + res) + end + + def test_not_replacing_mentions_in_pre_tags + body = "
    @kneath: okay
    " + + assert_equal(body, @filter.call(body, context: @context)) + end + + def test_not_replacing_mentions_in_code_tags + body = "

    @kneath: okay

    " + + assert_equal(body, @filter.call(body, context: @context)) + end + + def test_not_replacing_mentions_in_style_tags + body = "" + + assert_equal(body, @filter.call(body, context: @context)) + end + + def test_not_replacing_mentions_in_links + body = "

    @kneath okay

    " + + assert_equal(body, @filter.call(body, context: @context)) + end + + def test_entity_encoding_and_whatnot + body = "

    @kneath what's up

    " + + assert_equal(body, @filter.call(body, context: @context)) + end + + def test_html_injection + body = "

    @kneath <script>alert(0)</script>

    " + link = '@kneath' + + assert_equal("

    #{link} <script>alert(0)</script>

    ", + @filter.call(body, context: @context)) + end + + def test_base_url_slash + body = "

    Hi, @jch!

    " + link = '@jch' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: { base_url: "/" })) + end + + def test_base_url_under_custom_route + body = "

    Hi, @jch!

    " + link = '@jch' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: @context.merge({ base_url: "/userprofile" }))) + end + + def test_base_url_slash_with_tilde + body = "

    Hi, @jch!

    " + link = '@jch' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: @context.merge({ base_url: "/~" }))) + end + + def test_matches_usernames_in_body + body = "@test how are you?" + + assert_equal(["test"], mentioned_usernames(body)) + end + + def test_matches_usernames_with_dashes + body = "hi @some-user" + + assert_equal(["some-user"], mentioned_usernames(body)) + end + + def test_matches_usernames_followed_by_a_single_dot + body = "okay @some-user." + + assert_equal(["some-user"], mentioned_usernames(body)) + end + + def test_matches_usernames_followed_by_multiple_dots + body = "okay @some-user..." + + assert_equal(["some-user"], mentioned_usernames(body)) + end + + def test_does_not_match_email_addresses + body = "aman@tmm1.net" + + assert_empty(mentioned_usernames(body)) + end + + def test_does_not_match_domain_name_looking_things + body = "we need a @github.com email" + + assert_empty(mentioned_usernames(body)) + end + + def test_does_not_match_organization_team_mentions + body = "we need to @github/enterprise know" + + assert_empty(mentioned_usernames(body)) + end + + def test_matches_colon_suffixed_names + body = "@tmm1: what do you think?" + + assert_equal(["tmm1"], mentioned_usernames(body)) + end + + def test_matches_list_of_names + body = "@defunkt @atmos @kneath" + + assert_equal(["defunkt", "atmos", "kneath"], mentioned_usernames(body)) + end + + def test_matches_list_of_names_with_commas + body = "/cc @defunkt, @atmos, @kneath" + + assert_equal(["defunkt", "atmos", "kneath"], mentioned_usernames(body)) + end + + def test_matches_inside_brackets + body = "(@mislav) and [@rtomayko]" + + assert_equal(["mislav", "rtomayko"], mentioned_usernames(body)) + end + + def test_doesnt_ignore_invalid_users + body = "@defunkt @mojombo and @somedude" + + assert_equal(["defunkt", "mojombo", "somedude"], mentioned_usernames(body)) + end + + def test_returns_distinct_set + body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt" + + assert_equal(["defunkt", "atmos", "kneath"], mentioned_usernames(body)) + end + + def test_does_not_match_inline_code_block_with_multiple_code_blocks + body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`" + + assert_empty(mentioned_usernames(body)) + end + + def test_mention_at_end_of_parenthetical_sentence + body = "(We're talking 'bout @ymendel.)" + + assert_equal(["ymendel"], mentioned_usernames(body)) + end + + def test_username_pattern_can_be_customized + body = "

    @_abc: test.

    " + + res = @filter.call(body, context: { base_url: "/", username_pattern: /(_[a-z]{3})/ }) + + link = '@_abc' + + assert_equal("

    #{link}: test.

    ", + res) + end + + def test_filter_does_not_create_a_new_object_for_default_username_pattern + body = "
    @test
    " + + @filter.call(body.dup) + pattern_count = HTMLPipeline::NodeFilter::MentionFilter::MENTION_PATTERNS.length + + @filter.call(body.dup) + + assert_equal(pattern_count, HTMLPipeline::NodeFilter::MentionFilter::MENTION_PATTERNS.length) + + @filter.call(body.clone, context: { username_pattern: /test/ }) + + assert_equal(pattern_count + 1, HTMLPipeline::NodeFilter::MentionFilter::MENTION_PATTERNS.length) + end + + def test_mention_link_filter + result = HTMLPipeline::NodeFilter::MentionFilter.call("

    @hubot

    ") + expected = '

    @hubot

    ' + + assert_equal(expected, result) + end + end +end diff --git a/test/html_pipeline/node_filter/syntax_highlight_filter_test.rb b/test/html_pipeline/node_filter/syntax_highlight_filter_test.rb new file mode 100644 index 00000000..a2b47e0b --- /dev/null +++ b/test/html_pipeline/node_filter/syntax_highlight_filter_test.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require "test_helper" + +SyntaxHighlightFilter = HTMLPipeline::NodeFilter::SyntaxHighlightFilter + +class HTMLPipeline + class SyntaxHighlightFilterTest < Minitest::Test + def test_highlight_default + result = SyntaxHighlightFilter.call(\ + "
    hello
    ", context: { highlight: "coffeescript" } + ) + + doc = Nokogiri.parse(result) + + refute_empty(doc.css(".highlight")) + refute_empty(doc.css(".highlight-coffeescript")) + end + + def test_highlight_default_will_not_override + result = SyntaxHighlightFilter.call(\ + "
    hello
    ", context: { highlight: "coffeescript" } + ) + + doc = Nokogiri.parse(result) + + assert_empty(doc.css(".highlight-coffeescript")) + refute_empty(doc.css(".highlight-c")) + end + + def test_highlight_does_not_remove_pre_tag + result = SyntaxHighlightFilter.call(\ + "
    hello
    ", context: { highlight: "coffeescript" } + ) + + doc = Nokogiri.parse(result) + + refute_empty(doc.css("pre")) + end + + def test_highlight_allows_optional_scope + result = SyntaxHighlightFilter.call(\ + "
    hello
    ", context: { highlight: "coffeescript", scope: "test-scope" } + ) + + doc = Nokogiri.parse(result) + + refute_empty(doc.css("pre.test-scope")) + end + + def test_highlight_keeps_the_pre_tags_lang + result = SyntaxHighlightFilter.call(\ + "
    hello
    ", context: { highlight: "coffeescript" } + ) + + doc = Nokogiri.parse(result) + + refute_empty(doc.css("pre[lang=c]")) + end + end +end diff --git a/test/html_pipeline/node_filter/table_of_contents_filter_test.rb b/test/html_pipeline/node_filter/table_of_contents_filter_test.rb new file mode 100644 index 00000000..33b62889 --- /dev/null +++ b/test/html_pipeline/node_filter/table_of_contents_filter_test.rb @@ -0,0 +1,186 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class NodeFilter + class TableOfContentsFilterTest < Minitest::Test + TocFilter = HTMLPipeline::NodeFilter::TableOfContentsFilter + + TocPipeline = + HTMLPipeline.new(convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter.new, node_filters: [ + TocFilter.new, + ]) + + def toc_s(content) + result = TocPipeline.call(content, context: {}, result: result) + result[:toc].to_s + end + + def test_anchors_and_list_are_added_properly + orig = %(# Ice cube\n\nWill swarm on any motherfucker in a blue uniform) + result = TocPipeline.call(orig) + + assert_includes(result[:output], "Ice cube) + pipeline = HTMLPipeline.new(convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter, node_filters: [ + TocFilter.new(context: { anchor_html: "#" }), + ]) + result = pipeline.call(orig) + + assert_equal(expected, result[:output]) + end + + def test_toc_list_added_properly + orig = %(# Ice cube\n\nWill swarm on any motherfucker in a blue uniform) + result = TocPipeline.call(orig) + + assert_equal({ href: "#ice-cube", text: "Ice cube" }, result[:toc].first) + end + + def test_anchors_have_sane_names + orig = %(# Dr. Dre\n\n# Ice Cube\n\n# Eazy-E\n\n# MC Ren) + + result = TocPipeline.call(orig)[:output] + + assert_includes(result, '"dr-dre"') + assert_includes(result, '"ice-cube"') + assert_includes(result, '"eazy-e"') + assert_includes(result, '"mc-ren"') + end + + def test_anchors_have_aria_hidden + orig = "# Straight Outta Compton" + result = TocPipeline.call(orig)[:output] + + assert_includes(result, 'aria-hidden="true"') + end + + def test_toc_hrefs_have_sane_values + orig = %(# Dr. Dre\n\n# Ice Cube\n\n# Eazy-E\n\n# MC Ren) + result = TocPipeline.call(orig)[:output] + + assert_includes(result, '"#dr-dre"') + assert_includes(result, '"#ice-cube"') + assert_includes(result, '"#eazy-e"') + assert_includes(result, '"#mc-ren"') + end + + def test_dupe_headers_have_unique_trailing_identifiers + orig = <<~STR + # Straight Outta Compton + + ## Dopeman + + ### Express Yourself + + # Dopeman + STR + + result = TocPipeline.call(orig)[:output] + + assert_includes(result, '"dopeman"') + assert_includes(result, '"dopeman-1"') + end + + def test_dupe_headers_have_unique_toc_anchors + orig = <<~STR + # Straight Outta Compton + + ## Dopeman + + ### Express Yourself + + # Dopeman + STR + + assert_includes(toc_s(orig), '"#dopeman"') + assert_includes(toc_s(orig), '"#dopeman-1"') + end + + def test_all_header_tags_are_found_when_adding_anchors + orig = <<~STR + # "Funky President" by James Brown + ## "It's My Thing" by Marva Whitney + ### "Boogie Back" by Roy Ayers + #### "Feel Good" by Fancy + ##### "Funky Drummer" by James Brown + ###### "Ruthless Villain" by Eazy-E + STR + + result = TocPipeline.call(orig, context: {}, result: result) + + doc = Nokogiri::HTML(result[:output]) + + assert_equal(6, doc.search("a").size) + end + + def test_toc_outputs_escaped_html + orig = %(# <img src="x" onerror="alert(42)">) + + refute_includes(toc_s(orig), %()) + end + + def test_toc_is_complete + orig = <<~STR + # "Funky President" by James Brown + ## "It's My Thing" by Marva Whitney + ### "Boogie Back" by Roy Ayers + #### "Feel Good" by Fancy + ##### "Funky Drummer" by James Brown + ###### "Ruthless Villain" by Eazy-E + STR + + result = TocPipeline.call(orig)[:toc] + expected = [{ href: "#funky-president-by-james-brown", text: ""Funky President" by James Brown" }, + { href: "#its-my-thing-by-marva-whitney", text: ""It's My Thing" by Marva Whitney" }, + { href: "#boogie-back-by-roy-ayers", text: ""Boogie Back" by Roy Ayers" }, + { href: "#feel-good-by-fancy", text: ""Feel Good" by Fancy" }, + { href: "#funky-drummer-by-james-brown", text: ""Funky Drummer" by James Brown" }, + { href: "#ruthless-villain-by-eazy-e", text: ""Ruthless Villain" by Eazy-E" },] + + 0..6.times do |i| + assert_equal(expected[i], result[i]) + end + end + + def test_anchors_with_utf8_characters + orig = <<~STR + # 日本語 + + # Русский + STR + + rendered_h1s = Nokogiri::HTML(TocPipeline.call(orig)[:output]).search("h1").map(&:to_s) + + assert_equal("

    \n日本語

    ", + rendered_h1s[0]) + assert_equal("

    \nРусский

    ", + rendered_h1s[1]) + end + + def test_toc_with_utf8_characters + orig = <<~STR + # 日本語 + + # Русский + STR + + result = TocPipeline.call(orig)[:toc] + + expected = [{ href: "#%E6%97%A5%E6%9C%AC%E8%AA%9E", + text: "日本語", }, + { href: "#%D1%80%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9", + text: "Русский", },] + + 0..2.times do |i| + assert_equal(expected[i], result[i]) + end + end + end + end +end diff --git a/test/html_pipeline/node_filter/team_mention_filter_test.rb b/test/html_pipeline/node_filter/team_mention_filter_test.rb new file mode 100644 index 00000000..4c93dbab --- /dev/null +++ b/test/html_pipeline/node_filter/team_mention_filter_test.rb @@ -0,0 +1,220 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class TeamMentionFilterTest < Minitest::Test + def setup + @filter = HTMLPipeline::NodeFilter::TeamMentionFilter + + @pipeline = + HTMLPipeline.new(convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter.new, + node_filters: [ + HTMLPipeline::NodeFilter::TeamMentionFilter.new, + ]) + end + + def mentioned_teams(body) + result = {} + result = @pipeline.call(body, result: result) + result[:mentioned_teams] + end + + def test_filtering_plain_text + body = "

    @github/team: check it out.

    " + res = @filter.call(body, context: { base_url: "/" }) + + link = '@github/team' + + assert_equal("

    #{link}: check it out.

    ", + res) + end + + def test_not_replacing_mentions_in_pre_tags + body = "
    @github/team: okay
    " + + assert_equal(body, @filter.call(body)) + end + + def test_not_replacing_mentions_in_code_tags + body = "

    @github/team: okay

    " + + assert_equal(body, @filter.call(body)) + end + + def test_not_replacing_mentions_in_style_tags + body = "" + + assert_equal(body, @filter.call(body)) + end + + def test_not_replacing_mentions_in_links + body = "

    @github/team okay

    " + + assert_equal(body, @filter.call(body)) + end + + def test_entity_encoding_and_whatnot + body = "

    @github/team what's up

    " + link = '@github/team' + + assert_equal("

    #{link} what's up

    ", @filter.call(body, context: { base_url: "/" })) + end + + def test_html_injection + body = "

    @github/team <script>alert(0)</script>

    " + link = '@github/team' + + assert_equal("

    #{link} <script>alert(0)</script>

    ", + @filter.call(body, context: { base_url: "/" })) + end + + def test_links_to_nothing_with_user_mention + body = "

    Hi, @kneath

    " + + assert_equal("

    Hi, @kneath

    ", + @filter.call(body, context: { base_url: "/" })) + end + + def test_base_url_slash + body = "

    Hi, @github/team!

    " + link = '@github/team' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: { base_url: "/" })) + end + + def test_base_url_under_custom_route + body = "

    Hi, @org/team!

    " + link = '@org/team' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: { base_url: "www.github.com" })) + end + + def test_base_url_slash_with_tilde + body = "

    Hi, @github/team!

    " + link = '@github/team' + + assert_equal("

    Hi, #{link}!

    ", + @filter.call(body, context: { base_url: "/~" })) + end + + def test_multiple_team_mentions + body = "

    Hi, @github/whale and @github/donut!

    " + link_whale = '@github/whale' + link_donut = '@github/donut' + + assert_equal("

    Hi, #{link_whale} and #{link_donut}!

    ", + @filter.call(body)) + end + + def test_matches_teams_in_body + body = "@test/team how are you?" + + assert_equal(["team"], mentioned_teams(body)) + end + + def test_matches_orgs_with_dashes + body = "hi @some-org/team" + + assert_equal(["team"], mentioned_teams(body)) + end + + def test_matches_teams_with_dashes + body = "hi @github/some-team" + + assert_equal(["some-team"], mentioned_teams(body)) + end + + def test_matches_teams_followed_by_a_single_dot + body = "okay @github/team." + + assert_equal(["team"], mentioned_teams(body)) + end + + def test_matches_teams_followed_by_multiple_dots + body = "okay @github/team..." + + assert_equal(["team"], mentioned_teams(body)) + end + + def test_does_not_match_email_addresses + body = "aman@tmm1.net" + + assert_empty(mentioned_teams(body)) + end + + def test_does_not_match_domain_name_looking_things + body = "we need a @github.com email" + + assert_empty(mentioned_teams(body)) + end + + def test_does_not_match_user_mentions + body = "we need to @enterprise know" + + assert_empty(mentioned_teams(body)) + end + + def test_matches_colon_suffixed_team_names + body = "@github/team: what do you think?" + + assert_equal(["team"], mentioned_teams(body)) + end + + def test_matches_list_of_teams + body = "@github/whale @github/donut @github/green" + + assert_equal(["whale", "donut", "green"], mentioned_teams(body)) + end + + def test_matches_list_of_teams_with_commas + body = "/cc @github/whale, @github/donut, @github/green" + + assert_equal(["whale", "donut", "green"], mentioned_teams(body)) + end + + def test_matches_inside_brackets + body = "(@github/whale) and [@github/donut]" + + assert_equal(["whale", "donut"], mentioned_teams(body)) + end + + def test_returns_distinct_set + body = "/cc @github/whale, @github/donut, @github/whale, @github/whale" + + assert_equal(["whale", "donut"], mentioned_teams(body)) + end + + def test_does_not_match_inline_code_block_with_multiple_code_blocks + body = "something\n\n`/cc @github/whale @github/donut @github/green` `/cc @donut/donut`" + + assert_empty(mentioned_teams(body)) + end + + def test_mention_at_end_of_parenthetical_sentence + body = "(We're talking 'bout @some-org/some-team.)" + + assert_equal(["some-team"], mentioned_teams(body)) + end + + def test_team_pattern_can_be_customized + body = "

    @_abc/XYZ: test

    " + + res = @filter.call(body, context: { team_pattern: %r{@(_[a-z]{3})/([A-Z]{3})} }) + + link = '@_abc/XYZ' + + assert_equal("

    #{link}: test

    ", + res) + end + + def test_mention_link_filter + result = HTMLPipeline::NodeFilter::TeamMentionFilter.call("

    @bot/hubot

    ") + expected = "

    @bot/hubot

    " + + assert_equal(expected, result) + end + end +end diff --git a/test/html_pipeline/require_helper_test.rb b/test/html_pipeline/require_helper_test.rb new file mode 100644 index 00000000..3c1baeb4 --- /dev/null +++ b/test/html_pipeline/require_helper_test.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class RequireHelperTest < Minitest::Test + def test_works_with_existing_dependency + HTMLPipeline.require_dependency("rake", "SomeClass") + end + + def test_works_with_existing_dependencies + HTMLPipeline.require_dependencies(["old_sql", "nokogiri"], "SomeClass") + + assert_predicate(HTMLPipeline, :nokogiri_loaded?) + refute_predicate(HTMLPipeline, :old_sql_loaded?) + end + + def test_raises_mising_dependency_error + assert_raises(HTMLPipeline::MissingDependencyError) do + HTMLPipeline.require_dependency("non-existant", "SomeClass") + end + end + + def test_raises_mising_dependencies_error + assert_raises(HTMLPipeline::MissingDependencyError) do + HTMLPipeline.require_dependencies(["non-existant", "something"], "SomeClass") + end + end + + def test_raises_dependency_error_including_message + error = assert_raises(HTMLPipeline::MissingDependencyError) do + HTMLPipeline.require_dependency("non-existant", "SomeClass") + end + + assert_includes(error.message, "Missing dependency 'non-existant' for SomeClass. See README.md for details.") + end + + def test_raises_dependencies_error_including_message + error = assert_raises(HTMLPipeline::MissingDependencyError) do + HTMLPipeline.require_dependencies(["non-existant", "something"], "SomeClass") + end + + assert_includes(error.message, "Missing all dependencies 'non-existant, something' for SomeClass. See README.md for details.") + end + + def test_raises_error_includes_underlying_message + error = assert_raises(HTMLPipeline::MissingDependencyError) do + HTMLPipeline.require_dependency("non-existant", "SomeClass") + end + + assert_includes(error.message, "LoadError: cannot load such file") + end + end +end diff --git a/test/html_pipeline/text_filter/image_filter_test.rb b/test/html_pipeline/text_filter/image_filter_test.rb new file mode 100644 index 00000000..be67c278 --- /dev/null +++ b/test/html_pipeline/text_filter/image_filter_test.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "test_helper" + +ImageFilter = HTMLPipeline::TextFilter::ImageFilter + +class HTMLPipeline + class ImageFilterTest < Minitest::Test + def setup + @filter = ImageFilter + end + + def test_jpg + assert_equal(%(), + @filter.call(%(http://example.com/test.jpg))) + end + + def test_jpeg + assert_equal(%(), + @filter.call(%(http://example.com/test.jpeg))) + end + + def test_bmp + assert_equal(%(), + @filter.call(%(http://example.com/test.bmp))) + end + + def test_gif + assert_equal(%(), + @filter.call(%(http://example.com/test.gif))) + end + + def test_png + assert_equal(%(), + @filter.call(%(http://example.com/test.png))) + end + + def test_https_url + assert_equal(%(), + @filter.call(%(https://example.com/test.png))) + end + end +end diff --git a/test/html_pipeline/text_filter/plain_text_input_filter_test.rb b/test/html_pipeline/text_filter/plain_text_input_filter_test.rb new file mode 100644 index 00000000..742afab9 --- /dev/null +++ b/test/html_pipeline/text_filter/plain_text_input_filter_test.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class PlainTextInputFilterTest < Minitest::Test + PlainTextInputFilter = HTMLPipeline::TextFilter::PlainTextInputFilter + + def test_fails_when_given_a_documentfragment + body = "

    heyo

    " + doc = Nokogiri::HTML::DocumentFragment.parse(body) + assert_raises(TypeError) { PlainTextInputFilter.call(doc, context: {}) } + end + + def test_wraps_input_in_a_div_element + doc = PlainTextInputFilter.call("howdy pahtner", context: {}) + + assert_equal("
    howdy pahtner
    ", doc.to_s) + end + + def test_html_escapes_plain_text_input + doc = PlainTextInputFilter.call("See: ", context: {}) + + assert_equal("
    See: <http://example.org>
    ", + doc.to_s) + end + end +end diff --git a/test/html_pipeline_test.rb b/test/html_pipeline_test.rb new file mode 100644 index 00000000..e6b2a933 --- /dev/null +++ b/test/html_pipeline_test.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +require "test_helper" +require "helpers/mocked_instrumentation_service" + +class HTMLPipelineTest < Minitest::Test + class TestFilter < HTMLPipeline::TextFilter + class << self + def call(input, context: {}, result: {}) + input.reverse + end + end + end + + def setup + @default_context = {} + @pipeline = HTMLPipeline.new(text_filters: [TestFilter], default_context: @default_context) + end + + def test_filter_instrumentation + service = MockedInstrumentationService.new + events = service.subscribe("call_filter.html_pipeline") + @pipeline.instrumentation_service = service + body = "hello" + @pipeline.call(body) + event, payload, = events.pop + + assert(event, "event expected") + assert_equal("call_filter.html_pipeline", event) + assert_equal(TestFilter.name, payload[:filter]) + assert_equal(@pipeline.class.name, payload[:pipeline]) + assert_equal(body.reverse, payload[:result][:output]) + end + + def test_pipeline_instrumentation + service = MockedInstrumentationService.new + events = service.subscribe("call_text_filters.html_pipeline") + @pipeline.instrumentation_service = service + body = "hello" + @pipeline.call(body) + event, payload, = events.pop + + assert(event, "event expected") + assert_equal("call_text_filters.html_pipeline", event) + assert_equal(@pipeline.text_filters.map(&:name), payload[:text_filters]) + assert_equal(@pipeline.class.name, payload[:pipeline]) + assert_equal(body.reverse, payload[:result][:output]) + end + + def test_default_instrumentation_service + service = "default" + HTMLPipeline.default_instrumentation_service = service + pipeline = HTMLPipeline.new(text_filters: [], default_context: @default_context) + + assert_equal(service, pipeline.instrumentation_service) + ensure + HTMLPipeline.default_instrumentation_service = nil + end + + def test_setup_instrumentation + assert_nil(@pipeline.instrumentation_service) + + service = MockedInstrumentationService.new + events = service.subscribe("call_text_filters.html_pipeline") + name = "foo" + @pipeline.setup_instrumentation(name, service: service) + + assert_equal(service, @pipeline.instrumentation_service) + assert_equal(name, @pipeline.instrumentation_name) + + body = "foo" + @pipeline.call(body) + + event, payload, = events.pop + + assert(event, "expected event") + assert_equal(name, payload[:pipeline]) + assert_equal(body.reverse, payload[:result][:output]) + end + + def test_incorrect_text_filters + assert_raises(HTMLPipeline::InvalidFilterError) do + HTMLPipeline.new(text_filters: [HTMLPipeline::NodeFilter::MentionFilter], default_context: @default_context) + end + end + + def test_incorrect_convert_filter + assert_raises(HTMLPipeline::InvalidFilterError) do + HTMLPipeline.new(convert_filter: HTMLPipeline::NodeFilter::ImageMaxWidthFilter, default_context: @default_context) + end + end + + def test_incorrect_node_filters + assert_raises(HTMLPipeline::InvalidFilterError) do + HTMLPipeline.new(node_filters: [HTMLPipeline::ConvertFilter::MarkdownFilter], default_context: @default_context) + end + end +end diff --git a/test/sanitization_filter_test.rb b/test/sanitization_filter_test.rb new file mode 100644 index 00000000..5e26f4ff --- /dev/null +++ b/test/sanitization_filter_test.rb @@ -0,0 +1,262 @@ +# frozen_string_literal: true + +require "test_helper" + +class HTMLPipeline + class SanitizationFilterTest < Minitest::Test + SanitizationFilter = HTMLPipeline::SanitizationFilter + DEFAULT_CONFIG = SanitizationFilter::DEFAULT_CONFIG + + def test_removing_script_tags + orig = %(

    ) + html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s + + refute_match(/script/, html) + end + + def test_removing_style_tags + orig = %(

    ) + html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s + + refute_match(/style/, html) + end + + def test_removing_style_attributes + orig = %(

    YO DAWG

    ) + html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s + + refute_match(/font-size/, html) + refute_match(/style/, html) + end + + def test_removing_script_event_handler_attributes + orig = %(YO DAWG) + html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s + + refute_match(/javscript/, html) + refute_match(/onclick/, html) + end + + def test_sanitizes_li_elements_not_contained_in_ul_or_ol + stuff = "a\n
  • b
  • \nc" + html = SanitizationFilter.call(stuff, { elements: {} }).to_s + + assert_equal("a\nb\nc", html) + end + + def test_does_not_sanitize_li_elements_contained_in_ul_or_ol + stuff = "a\n
    • b
    \nc" + + assert_equal(stuff, SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s) + end + + def test_github_specific_protocols_are_removed + stuff = 'Spill this yo and so on' + + assert_equal("Spill this yo and so on", SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s) + end + + def test_unknown_schemes_are_removed + stuff = 'Wat is this' + html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s + + assert_equal("Wat is this", html) + end + + def test_allowlisted_longdesc_schemes_are_allowed + stuff = '' + html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s + + assert_equal('', html) + end + + def test_weird_longdesc_schemes_are_removed + stuff = '' + html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s + + assert_equal('', html) + end + + def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes + config = DEFAULT_CONFIG.merge(protocols: { "a" => { "href" => [] } }) + stuff = 'No href for you' + html = SanitizationFilter.call(stuff, config) + + assert_equal("No href for you", html) + end + + def test_custom_anchor_schemes_are_not_removed + config = DEFAULT_CONFIG.merge(protocols: { "a" => { "href" => ["something-weird"] } }) + stuff = 'Wat is this' + html = SanitizationFilter.call(stuff, config) + + assert_equal(stuff, html) + end + + def test_allow_svg_elements_to_be_added + config = DEFAULT_CONFIG.dup + frag = <<~FRAG + + + + FRAG + + html = SanitizationFilter.call(frag, config) + + assert_equal("\n", html) + + config = { elements: ["svg", "circle"], + attributes: { "svg" => ["width"], + "circle" => ["cx", "cy", "r"], }, } + + result = <<~FRAG + + + + FRAG + + html = SanitizationFilter.call(frag, config) + + assert_equal(result, html) + end + + def test_anchor_schemes_are_merged_with_other_anchor_restrictions + stuff = 'Wat is this' + allowlist = { + elements: ["a"], + attributes: { "a" => ["href"] }, + protocols: { "a" => { "href" => ["something-weird"] } }, + } + html = SanitizationFilter.call(stuff, allowlist) + + assert_equal('Wat is this', html) + end + + def test_uses_anchor_schemes_from_allowlist_when_not_separately_specified + stuff = 'Wat is this' + allowlist = { + elements: ["a"], + attributes: { "a" => ["href"] }, + protocols: { "a" => { "href" => ["something-weird"] } }, + } + html = SanitizationFilter.call(stuff, allowlist) + + assert_equal(stuff, html) + end + + def test_allowlist_contains_default_anchor_schemes + assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::DEFAULT_CONFIG[:protocols]["a"]["href"]) + end + + def test_exports_default_anchor_schemes + assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::VALID_PROTOCOLS) + end + + def test_script_contents_are_removed + orig = "" + + assert_equal("", SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s) + end + + def test_table_rows_and_cells_removed_if_not_in_table + orig = %(FooBar) + + assert_equal("FooBar", SanitizationFilter.call(orig, { elements: {} })) + end + + def test_table_sections_removed_if_not_in_table + orig = %(Foo) + + assert_equal("Foo", SanitizationFilter.call(orig, { elements: {} }).to_s) + end + + def test_table_sections_are_not_removed + orig = %( + + + +
    Column 1
    Sum
    1
    ) + + assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s) + end + + def test_summary_tag_are_not_removed + orig = %(Foo) + + assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s) + end + + def test_details_tag_and_open_attribute_are_not_removed + orig = %(
    Foo
    ) + + assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s) + end + + def test_nested_details_tag_are_not_removed + orig = <<-NESTED +
    + Foo +
    + Bar + Baz +
    + Qux +
    + NESTED + assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s) + end + + def test_sanitization_pipeline_can_be_configured + config = { + elements: ["p", "pre", "code"], + } + + pipeline = HTMLPipeline.new(\ + convert_filter: + HTMLPipeline::ConvertFilter::MarkdownFilter.new, + sanitization_config: config, + node_filters: [ + HTMLPipeline::NodeFilter::MentionFilter.new, + ], + ) + + result = pipeline.call(<<~CODE) + This is *great*, @balevine: + + some_code(:first) + CODE + + expected = <<~HTML +

    This is great, @balevine:

    +
    some_code(:first)
    +        
    + HTML + + assert_equal(result[:output].to_s, expected.chomp) + end + + def test_sanitization_pipeline_can_be_removed + pipeline = HTMLPipeline.new(\ + convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter.new(context: { markdown: { plugins: { syntax_highlighter: nil } } }), + sanitization_config: nil, + node_filters: [ + HTMLPipeline::NodeFilter::MentionFilter.new, + ], + ) + + result = pipeline.call(<<~CODE) + This is *great*, @balevine: + + some_code(:first) + CODE + + expected = <<~HTML +

    This is great, @balevine:

    +
    some_code(:first)
    +        
    + HTML + + assert_equal(result[:output].to_s, expected.chomp) + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index ddb78e80..6e70b91a 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,20 +1,17 @@ # frozen_string_literal: true -require 'bundler/setup' -require 'html/pipeline' -require 'minitest/autorun' -require 'minitest/pride' -require 'minitest/focus' +require "bundler/setup" +require "html_pipeline" -require 'active_support/core_ext/string' +require "minitest/autorun" +require "minitest/pride" +require "minitest/focus" + +require "awesome_print" + +require "nokogiri" module TestHelpers - # Asserts that two html fragments are equivalent. Attribute order - # will be ignored. - def assert_equal_html(expected, actual) - assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash, - Nokogiri::HTML::DocumentFragment.parse(actual).to_hash - end end -Minitest::Test.send(:include, TestHelpers) +Minitest::Test.include(TestHelpers)