diff --git a/CHANGELOG.md b/CHANGELOG.md index 2038b02..c822d9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +- Protocol-relative URLs are properly supported for script tags + ## 2.7.0 (2022-02-04) - Allows a more sensible set of default attributes on `` tags. Thanks to [Zade Viggers](https://github.com/zadeviggers). diff --git a/index.js b/index.js index 8b21565..754bf10 100644 --- a/index.js +++ b/index.js @@ -287,7 +287,6 @@ function sanitizeHtml(html, options, _recursing) { delete frame.attribs[a]; return; } - let parsed; // check allowedAttributesMap for the element and attribute and modify the value // as necessary if there are specific values defined. let passedAllowedAttributesMapCheck = false; @@ -335,14 +334,14 @@ function sanitizeHtml(html, options, _recursing) { let allowed = true; try { - const parsed = new URL(value); + const parsed = parseUrl(value); if (options.allowedScriptHostnames || options.allowedScriptDomains) { const allowedHostname = (options.allowedScriptHostnames || []).find(function (hostname) { - return hostname === parsed.hostname; + return hostname === parsed.url.hostname; }); const allowedDomain = (options.allowedScriptDomains || []).find(function(domain) { - return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`); + return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`); }); allowed = allowedHostname || allowedDomain; } @@ -359,29 +358,9 @@ function sanitizeHtml(html, options, _recursing) { if (name === 'iframe' && a === 'src') { let allowed = true; try { - // Chrome accepts \ as a substitute for / in the // at the - // start of a URL, so rewrite accordingly to prevent exploit. - // Also drop any whitespace at that point in the URL - value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//'); - if (value.startsWith('relative:')) { - // An attempt to exploit our workaround for base URLs being - // mandatory for relative URL validation in the WHATWG - // URL parser, reject it - throw new Error('relative: exploit attempt'); - } - // naughtyHref is in charge of whether protocol relative URLs - // are cool. Here we are concerned just with allowed hostnames and - // whether to allow relative URLs. - // - // Build a placeholder "base URL" against which any reasonable - // relative URL may be parsed successfully - let base = 'relative://relative-site'; - for (let i = 0; (i < 100); i++) { - base += `/${i}`; - } - const parsed = new URL(value, base); - const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:'; - if (isRelativeUrl) { + const parsed = parseUrl(value); + + if (parsed.isRelativeUrl) { // default value of allowIframeRelativeUrls is true // unless allowedIframeHostnames or allowedIframeDomains specified allowed = has(options, 'allowIframeRelativeUrls') @@ -389,10 +368,10 @@ function sanitizeHtml(html, options, _recursing) { : (!options.allowedIframeHostnames && !options.allowedIframeDomains); } else if (options.allowedIframeHostnames || options.allowedIframeDomains) { const allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) { - return hostname === parsed.hostname; + return hostname === parsed.url.hostname; }); const allowedDomain = (options.allowedIframeDomains || []).find(function(domain) { - return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`); + return parsed.url.hostname === domain || parsed.url.hostname.endsWith(`.${domain}`); }); allowed = allowedHostname || allowedDomain; } @@ -407,7 +386,7 @@ function sanitizeHtml(html, options, _recursing) { } if (a === 'srcset') { try { - parsed = parseSrcset(value); + let parsed = parseSrcset(value); parsed.forEach(function(value) { if (naughtyHref('srcset', value.url)) { value.evil = true; @@ -656,6 +635,33 @@ function sanitizeHtml(html, options, _recursing) { return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1; } + function parseUrl(value) { + value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//'); + if (value.startsWith('relative:')) { + // An attempt to exploit our workaround for base URLs being + // mandatory for relative URL validation in the WHATWG + // URL parser, reject it + throw new Error('relative: exploit attempt'); + } + // naughtyHref is in charge of whether protocol relative URLs + // are cool. Here we are concerned just with allowed hostnames and + // whether to allow relative URLs. + // + // Build a placeholder "base URL" against which any reasonable + // relative URL may be parsed successfully + let base = 'relative://relative-site'; + for (let i = 0; (i < 100); i++) { + base += `/${i}`; + } + + const parsed = new URL(value, base); + + const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:'; + return { + isRelativeUrl, + url: parsed + }; + } /** * Filters user input css properties by allowlisted regex attributes. * Modifies the abstractSyntaxTree object. diff --git a/test/test.js b/test/test.js index 868b3ef..cba4de7 100644 --- a/test/test.js +++ b/test/test.js @@ -1476,5 +1476,15 @@ describe('sanitizeHtml', function() { }), '' ); }); + it('Should allow protocol-relative URLs for script tag', function() { + assert.equal( + sanitizeHtml('', { + allowedTags: [ 'script' ], + allowedAttributes: { + script: [ 'src' ] + } + }), '' + ); + }); });