Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the AssetProxyFilter #379

Merged
merged 6 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,3 @@ updates:
time: "09:00"
timezone: "Etc/UTC"
open-pull-requests-limit: 10
allow:
- dependency-name: "*"
dependency-type: "production"
25 changes: 2 additions & 23 deletions .github/workflows/automerge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,5 @@ permissions:

jobs:
dependabot:
name: Dependabot
runs-on: ubuntu-latest

if: ${{ github.actor == 'dependabot[bot]' }}
steps:
- name: Fetch Dependabot metadata
id: dependabot-metadata
uses: dependabot/fetch-metadata@v1
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"

- name: Approve Dependabot PR
if: ${{steps.dependabot-metadata.outputs.update-type != 'version-update:semver-major'}}
run: gh pr review --approve "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Merge Dependabot PR
run: gh pr merge --auto --squash "$PR_URL"
env:
PR_URL: ${{ github.event.pull_request.html_url }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
uses: yettoapp/actions/.github/workflows/automerge_dependabot.yml@main
secrets: inherit
22 changes: 22 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: CI

on:
pull_request:

permissions:
contents: read

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Ruby
uses: yettoapp/actions/setup-languages@main
with:
ruby: true

- name: Run tests
run: bundle exec rake test
10 changes: 5 additions & 5 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ruby/setup-ruby@v1

- name: Set up Ruby
uses: yettoapp/actions/setup-languages@main
with:
ruby-version: 3.1.0
rubygems: latest
bundler-cache: true
- run: bundle install
ruby: true

- name: Rubocop
run: bundle exec rake rubocop
19 changes: 19 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Release

on:
workflow_dispatch:
push:
branches:
- main
paths:
- "lib/html_pipeline/version.rb"

jobs:
ruby:
uses: yettoapp/actions/.github/workflows/ruby_gem_release.yml@main
secrets:
rubygems_api_key: ${{ secrets.RUBYGEMS_API_BOT_KEY }}
gh_token: ${{ secrets.GITHUB_TOKEN }}
with:
gem_name: html-pipeline
version_filepath: lib/html_pipeline/version.rb
70 changes: 0 additions & 70 deletions .github/workflows/tag_and_release.yml

This file was deleted.

33 changes: 0 additions & 33 deletions .github/workflows/test.yml

This file was deleted.

1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.2.1
8 changes: 8 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"[markdown]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[ruby]": {
"editor.defaultFormatter": "Shopify.ruby-lsp"
}
}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ end
For more information on how to write effective `NodeFilter`s, refer to the provided filters, and see the underlying lib, [Selma](https://www.github.com/gjtorikian/selma) for more information.

- `AbsoluteSourceFilter`: replace relative image urls with fully qualified versions
- `AssetProxyFilter`: replace image links with an encoded link to an asset server
- `EmojiFilter`: converts `:<emoji>:` to [emoji](http://www.emoji-cheat-sheet.com/)
- (Note: the included `MarkdownFilter` will already convert emoji)
- `HttpsFilter`: Replacing http urls with https versions
Expand Down
2 changes: 1 addition & 1 deletion lib/html_pipeline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def require_dependencies(names, requirer)
def define_dependency_loaded_method(name, value)
self.class.define_method(:"#{name}_loaded?", -> { value })
end
end
end
# Public: Returns an Array of Filter objects for this Pipeline.
attr_reader :text_filters, :node_filters

Expand Down
2 changes: 1 addition & 1 deletion lib/html_pipeline/convert_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ class << self
def call(text, context: {}, result: {})
new(context: context, result: result).call(text)
end
end
end
end
end
2 changes: 1 addition & 1 deletion lib/html_pipeline/filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class << self
def call(input, context: {})
raise NoMethodError
end
end
end
# Make sure the context has everything we need. Noop: Subclasses can override.
def validate; end

Expand Down
26 changes: 13 additions & 13 deletions lib/html_pipeline/node_filter/absolute_source_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,26 @@

class HTMLPipeline
class NodeFilter
# HTML Filter for replacing relative and root relative image URLs with
# fully qualified URLs
#
# This is useful if an image is root relative but should really be going
# through a cdn, or if the content for the page assumes the host is known
# i.e. scraped webpages and some RSS feeds.
#
# Context options:
# :image_base_url - Base URL for image host for root relative src.
# :image_subpage_url - For relative src.
#
# This filter does not write additional information to the context.
# Note: This filter would need to be run before AssetProxyFilter.
class AbsoluteSourceFilter < NodeFilter
SELECTOR = Selma::Selector.new(match_element: "img")

def selector
SELECTOR
end

# HTML Filter for replacing relative and root relative image URLs with
# fully qualified URLs
#
# This is useful if an image is root relative but should really be going
# through a cdn, or if the content for the page assumes the host is known
# i.e. scraped webpages and some RSS feeds.
#
# Context options:
# :image_base_url - Base URL for image host for root relative src.
# :image_subpage_url - For relative src.
#
# This filter does not write additional information to the context.
# This filter would need to be run before CamoFilter.
def handle_element(element)
src = element["src"]
return if src.nil? || src.empty?
Expand Down
86 changes: 86 additions & 0 deletions lib/html_pipeline/node_filter/asset_proxy_filter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# frozen_string_literal: true

require "openssl"

class HTMLPipeline
class NodeFilter
# Proxy images/assets to another server, such as
# [cactus/go-camo](https://github.com/cactus/go-camo#).
# Reduces mixed content warnings as well as hiding the customer's
# IP address when requesting images.
# Copies the original img `src` to `data-canonical-src` then replaces the
# `src` with a new url to the proxy server.
#
# Based on https://github.com/gjtorikian/html-pipeline/blob/v2.14.3/lib/html/pipeline/camo_filter.rb
class AssetProxyFilter < NodeFilter
SELECTOR = Selma::Selector.new(match_element: "img")

def selector
SELECTOR
end

def handle_element(element)
original_src = element["src"]
return unless original_src

begin
uri = URI.parse(original_src)
rescue StandardError
return
end

return if uri.host.nil? && !original_src.start_with?("///")
return if asset_host_allowed?(uri.host)

element["src"] = asset_proxy_url(original_src)
element["data-canonical-src"] = original_src
end

def validate
needs(:asset_proxy, :asset_proxy_secret_key)
end

def asset_host_allowed?(host)
context[:asset_proxy_domain_regexp] ? context[:asset_proxy_domain_regexp].match?(host) : false
end

class << self
# This helps setup the context. It's not needed if you're always providing
# all the necessary keys in the context. One example would be to override
# this and pull the settings from a set of global application settings.
def transform_context(context, proxy_settings = {})
context[:asset_proxy] = proxy_settings[:url] if proxy_settings[:url]
context[:asset_proxy_secret_key] = proxy_settings[:secret_key] if proxy_settings[:secret_key]

allowlist = determine_allowlist(proxy_settings)
context[:asset_proxy_domain_regexp] ||= compile_allowlist(allowlist)

context
end

def compile_allowlist(domain_list)
return if domain_list.empty?

escaped = domain_list.map { |domain| Regexp.escape(domain).gsub("\\*", ".*?") }
Regexp.new("^(#{escaped.join("|")})$", Regexp::IGNORECASE)
end

def determine_allowlist(proxy_settings)
proxy_settings[:allowlist] || []
end
end

private def asset_proxy_url(url)
"#{context[:asset_proxy]}/#{asset_url_hash(url)}/#{hexencode(url)}"
end

private def asset_url_hash(url)
OpenSSL::HMAC.hexdigest("sha1", context[:asset_proxy_secret_key], url)
end

private def hexencode(str)
str.unpack1("H*")
end
end
end
end
2 changes: 1 addition & 1 deletion lib/html_pipeline/node_filter/mention_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def mentioned_logins_in(text, username_pattern = USERNAME_PATTERN)
yield match, login
end
end
end
end
# Hash that contains all of the mention patterns used by the pipeline
MENTION_PATTERNS = Hash.new do |hash, key|
hash[key] = %r{
Expand Down
2 changes: 1 addition & 1 deletion lib/html_pipeline/node_filter/team_mention_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def mentioned_teams_in(text, team_pattern = TEAM_PATTERN)
yield match, org, team
end
end
end
end

# Default pattern used to extract team names from text. The value can be
# overridden by providing the team_pattern variable in the context. To
Expand Down
2 changes: 1 addition & 1 deletion lib/html_pipeline/sanitization_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,6 @@ def call(html, config)
sanitization_config = Selma::Sanitizer.new(config)
Selma::Rewriter.new(sanitizer: sanitization_config).rewrite(html)
end
end
end
end
end
Loading