From 9dd310a4cd91b81588b3a19898dadf827322353f Mon Sep 17 00:00:00 2001 From: Bill Thornton Date: Wed, 6 Jul 2022 13:20:23 -0400 Subject: [PATCH 1/3] Include normalize-url directly in source --- .eslintignore | 1 + jest.config.js | 5 +- package-lock.json | 5 - package.json | 3 +- src/utils/normalize-url/index.d.ts | 263 ++++++++++++++++++++++++ src/utils/normalize-url/index.js | 246 +++++++++++++++++++++++ src/utils/normalize-url/license | 9 + src/utils/normalize-url/package.json | 52 +++++ src/utils/normalize-url/readme.md | 287 +++++++++++++++++++++++++++ src/utils/url.ts | 2 +- tsconfig.json | 1 + 11 files changed, 862 insertions(+), 12 deletions(-) create mode 100644 src/utils/normalize-url/index.d.ts create mode 100644 src/utils/normalize-url/index.js create mode 100644 src/utils/normalize-url/license create mode 100644 src/utils/normalize-url/package.json create mode 100644 src/utils/normalize-url/readme.md diff --git a/.eslintignore b/.eslintignore index 3c93d30b9..42ac8061e 100644 --- a/.eslintignore +++ b/.eslintignore @@ -2,3 +2,4 @@ dist generated-client lib node_modules +src/utils/normalize-url diff --git a/jest.config.js b/jest.config.js index 3a6e9d7d0..5b32b7af5 100644 --- a/jest.config.js +++ b/jest.config.js @@ -3,17 +3,14 @@ module.exports = { testEnvironment: 'node', runner: 'groups', - // normalize-url exports an ES module so we need to transform js files - // and change the ignore pattern so it is transformed preset: 'ts-jest/presets/js-with-ts', globals: { 'ts-jest': { tsconfig: { allowJs: true } } }, - transformIgnorePatterns: [ '/node_modules/(?!(normalize-url)/)' ], // Coverage options collectCoverageFrom: [ 'src/**' ], - coveragePathIgnorePatterns: [ '__helpers__', 'generated-client' ] + coveragePathIgnorePatterns: [ '__helpers__', 'generated-client', 'normalize-url' ] }; diff --git a/package-lock.json b/package-lock.json index 00d7c8d1e..f09541ac2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5272,11 +5272,6 @@ "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", "dev": true }, - "normalize-url": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-6.1.0.tgz", - "integrity": "sha512-DlL+XwOy3NxAQ8xuC0okPgK46iuVNAK01YN7RueYBqqFeGsBjV9XmCAzAdgt+667bCl5kPh9EqKKDwnaPG1I7A==" - }, "npm-run-path": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", diff --git a/package.json b/package.json index 2615250ec..d4ad70c3f 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,6 @@ }, "dependencies": { "axios": "^0.27.2", - "compare-versions": "^4.0.0", - "normalize-url": "^6.1.0" + "compare-versions": "^4.0.0" } } diff --git a/src/utils/normalize-url/index.d.ts b/src/utils/normalize-url/index.d.ts new file mode 100644 index 000000000..caa264c1d --- /dev/null +++ b/src/utils/normalize-url/index.d.ts @@ -0,0 +1,263 @@ +export interface Options { + /** + @default 'http:' + */ + readonly defaultProtocol?: string; + + /** + Prepends `defaultProtocol` to the URL if it's protocol-relative. + + @default true + + @example + ``` + normalizeUrl('//sindresorhus.com:80/'); + //=> 'http://sindresorhus.com' + + normalizeUrl('//sindresorhus.com:80/', {normalizeProtocol: false}); + //=> '//sindresorhus.com' + ``` + */ + readonly normalizeProtocol?: boolean; + + /** + Normalizes `https:` URLs to `http:`. + + @default false + + @example + ``` + normalizeUrl('https://sindresorhus.com:80/'); + //=> 'https://sindresorhus.com' + + normalizeUrl('https://sindresorhus.com:80/', {forceHttp: true}); + //=> 'http://sindresorhus.com' + ``` + */ + readonly forceHttp?: boolean; + + /** + Normalizes `http:` URLs to `https:`. + + This option can't be used with the `forceHttp` option at the same time. + + @default false + + @example + ``` + normalizeUrl('https://sindresorhus.com:80/'); + //=> 'https://sindresorhus.com' + + normalizeUrl('http://sindresorhus.com:80/', {forceHttps: true}); + //=> 'https://sindresorhus.com' + ``` + */ + readonly forceHttps?: boolean; + + /** + Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of a URL. + + @default true + + @example + ``` + normalizeUrl('user:password@sindresorhus.com'); + //=> 'https://sindresorhus.com' + + normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false}); + //=> 'https://user:password@sindresorhus.com' + ``` + */ + readonly stripAuthentication?: boolean; + + /** + Removes hash from the URL. + + @default false + + @example + ``` + normalizeUrl('sindresorhus.com/about.html#contact'); + //=> 'http://sindresorhus.com/about.html#contact' + + normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true}); + //=> 'http://sindresorhus.com/about.html' + ``` + */ + readonly stripHash?: boolean; + + /** + Removes HTTP(S) protocol from an URL `http://sindresorhus.com` → `sindresorhus.com`. + + @default false + + @example + ``` + normalizeUrl('https://sindresorhus.com'); + //=> 'https://sindresorhus.com' + + normalizeUrl('sindresorhus.com', {stripProtocol: true}); + //=> 'sindresorhus.com' + ``` + */ + readonly stripProtocol?: boolean; + + /** + Strip the [text fragment](https://web.dev/text-fragments/) part of the URL + + __Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. + + @default true + + @example + ``` + normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); + //=> 'http://sindresorhus.com/about.html#' + + normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); + //=> 'http://sindresorhus.com/about.html#section' + + normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); + //=> 'http://sindresorhus.com/about.html#:~:text=hello' + + normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); + //=> 'http://sindresorhus.com/about.html#section:~:text=hello' + ``` + */ + readonly stripTextFragment?: boolean; + + /** + Removes `www.` from the URL. + + @default true + + @example + ``` + normalizeUrl('http://www.sindresorhus.com'); + //=> 'http://sindresorhus.com' + + normalizeUrl('http://www.sindresorhus.com', {stripWWW: false}); + //=> 'http://www.sindresorhus.com' + ``` + */ + readonly stripWWW?: boolean; + + /** + Removes query parameters that matches any of the provided strings or regexes. + + @default [/^utm_\w+/i] + + @example + ``` + normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', { + removeQueryParameters: ['ref'] + }); + //=> 'http://sindresorhus.com/?foo=bar' + ``` + + If a boolean is provided, `true` will remove all the query parameters. + + ``` + normalizeUrl('www.sindresorhus.com?foo=bar', { + removeQueryParameters: true + }); + //=> 'http://sindresorhus.com' + ``` + + `false` will not remove any query parameter. + + ``` + normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', { + removeQueryParameters: false + }); + //=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test' + ``` + */ + readonly removeQueryParameters?: ReadonlyArray | boolean; + + /** + Removes trailing slash. + + __Note__: Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`. + + @default true + + @example + ``` + normalizeUrl('http://sindresorhus.com/redirect/'); + //=> 'http://sindresorhus.com/redirect' + + normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false}); + //=> 'http://sindresorhus.com/redirect/' + + normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false}); + //=> 'http://sindresorhus.com' + ``` + */ + readonly removeTrailingSlash?: boolean; + + /** + Remove a sole `/` pathname in the output. This option is independant of `removeTrailingSlash`. + + @default true + + @example + ``` + normalizeUrl('https://sindresorhus.com/'); + //=> 'https://sindresorhus.com' + + normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false}); + //=> 'https://sindresorhus.com/' + ``` + */ + readonly removeSingleSlash?: boolean; + + /** + Removes the default directory index file from path that matches any of the provided strings or regexes. + When `true`, the regex `/^index\.[a-z]+$/` is used. + + @default false + + @example + ``` + normalizeUrl('www.sindresorhus.com/foo/default.php', { + removeDirectoryIndex: [/^default\.[a-z]+$/] + }); + //=> 'http://sindresorhus.com/foo' + ``` + */ + readonly removeDirectoryIndex?: boolean | ReadonlyArray; + + /** + Sorts the query parameters alphabetically by key. + + @default true + + @example + ``` + normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { + sortQueryParameters: false + }); + //=> 'http://sindresorhus.com/?b=two&a=one&c=three' + ``` + */ + readonly sortQueryParameters?: boolean; +} + +/** +[Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL. + +@param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). + +@example +``` +import normalizeUrl from 'normalize-url'; + +normalizeUrl('sindresorhus.com'); +//=> 'http://sindresorhus.com' + +normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); +//=> 'http://sindresorhus.com/baz?a=foo&b=bar' +``` +*/ +export default function normalizeUrl(url: string, options?: Options): string; diff --git a/src/utils/normalize-url/index.js b/src/utils/normalize-url/index.js new file mode 100644 index 000000000..1c5f9a14f --- /dev/null +++ b/src/utils/normalize-url/index.js @@ -0,0 +1,246 @@ +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs +const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; +const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; + +const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); + +const normalizeDataURL = (urlString, {stripHash}) => { + const match = /^data:(?[^,]*?),(?[^#]*?)(?:#(?.*))?$/.exec(urlString); + + if (!match) { + throw new Error(`Invalid URL: ${urlString}`); + } + + let {type, data, hash} = match.groups; + const mediaType = type.split(';'); + hash = stripHash ? '' : hash; + + let isBase64 = false; + if (mediaType[mediaType.length - 1] === 'base64') { + mediaType.pop(); + isBase64 = true; + } + + // Lowercase MIME type + const mimeType = (mediaType.shift() || '').toLowerCase(); + const attributes = mediaType + .map(attribute => { + let [key, value = ''] = attribute.split('=').map(string => string.trim()); + + // Lowercase `charset` + if (key === 'charset') { + value = value.toLowerCase(); + + if (value === DATA_URL_DEFAULT_CHARSET) { + return ''; + } + } + + return `${key}${value ? `=${value}` : ''}`; + }) + .filter(Boolean); + + const normalizedMediaType = [ + ...attributes, + ]; + + if (isBase64) { + normalizedMediaType.push('base64'); + } + + if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { + normalizedMediaType.unshift(mimeType); + } + + return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; +}; + +export default function normalizeUrl(urlString, options) { + options = { + defaultProtocol: 'http:', + normalizeProtocol: true, + forceHttp: false, + forceHttps: false, + stripAuthentication: true, + stripHash: false, + stripTextFragment: true, + stripWWW: true, + removeQueryParameters: [/^utm_\w+/i], + removeTrailingSlash: true, + removeSingleSlash: true, + removeDirectoryIndex: false, + sortQueryParameters: true, + ...options, + }; + + urlString = urlString.trim(); + + // Data URL + if (/^data:/i.test(urlString)) { + return normalizeDataURL(urlString, options); + } + + if (/^view-source:/i.test(urlString)) { + throw new Error('`view-source:` is not supported as it is a non-standard protocol'); + } + + const hasRelativeProtocol = urlString.startsWith('//'); + const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); + + // Prepend protocol + if (!isRelativeUrl) { + urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); + } + + const urlObject = new URL(urlString); + + if (options.forceHttp && options.forceHttps) { + throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); + } + + if (options.forceHttp && urlObject.protocol === 'https:') { + urlObject.protocol = 'http:'; + } + + if (options.forceHttps && urlObject.protocol === 'http:') { + urlObject.protocol = 'https:'; + } + + // Remove auth + if (options.stripAuthentication) { + urlObject.username = ''; + urlObject.password = ''; + } + + // Remove hash + if (options.stripHash) { + urlObject.hash = ''; + } else if (options.stripTextFragment) { + urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); + } + + // Remove duplicate slashes if not preceded by a protocol + // NOTE: This could be implemented using a single negative lookbehind + // regex, but we avoid that to maintain compatibility with older js engines + // which do not have support for that feature. + if (urlObject.pathname) { + // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 0) { + let pathComponents = urlObject.pathname.split('/'); + const lastComponent = pathComponents[pathComponents.length - 1]; + + if (testParameter(lastComponent, options.removeDirectoryIndex)) { + pathComponents = pathComponents.slice(0, -1); + urlObject.pathname = pathComponents.slice(1).join('/') + '/'; + } + } + + if (urlObject.hostname) { + // Remove trailing dot + urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); + + // Remove `www.` + if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { + // Each label should be max 63 at length (min: 1). + // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names + // Each TLD should be up to 63 characters long (min: 2). + // It is technically possible to have a single character TLD, but none currently exist. + urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); + } + } + + // Remove query unwanted parameters + if (Array.isArray(options.removeQueryParameters)) { + // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. + for (const key of [...urlObject.searchParams.keys()]) { + if (testParameter(key, options.removeQueryParameters)) { + urlObject.searchParams.delete(key); + } + } + } + + if (options.removeQueryParameters === true) { + urlObject.search = ''; + } + + // Sort query parameters + if (options.sortQueryParameters) { + urlObject.searchParams.sort(); + + // Calling `.sort()` encodes the search parameters, so we need to decode them again. + try { + urlObject.search = decodeURIComponent(urlObject.search); + } catch {} + } + + if (options.removeTrailingSlash) { + urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); + } + + const oldUrlString = urlString; + + // Take advantage of many of the Node `url` normalizations + urlString = urlObject.toString(); + + if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { + urlString = urlString.replace(/\/$/, ''); + } + + // Remove ending `/` unless removeSingleSlash is false + if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { + urlString = urlString.replace(/\/$/, ''); + } + + // Restore relative protocol, if applicable + if (hasRelativeProtocol && !options.normalizeProtocol) { + urlString = urlString.replace(/^http:\/\//, '//'); + } + + // Remove http/https + if (options.stripProtocol) { + urlString = urlString.replace(/^(?:https?:)?\/\//, ''); + } + + return urlString; +} diff --git a/src/utils/normalize-url/license b/src/utils/normalize-url/license new file mode 100644 index 000000000..fa7ceba3e --- /dev/null +++ b/src/utils/normalize-url/license @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) Sindre Sorhus (https://sindresorhus.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/utils/normalize-url/package.json b/src/utils/normalize-url/package.json new file mode 100644 index 000000000..b9eb910c1 --- /dev/null +++ b/src/utils/normalize-url/package.json @@ -0,0 +1,52 @@ +{ + "name": "normalize-url", + "version": "7.0.3", + "description": "Normalize a URL", + "license": "MIT", + "repository": "sindresorhus/normalize-url", + "funding": "https://github.com/sponsors/sindresorhus", + "author": { + "name": "Sindre Sorhus", + "email": "sindresorhus@gmail.com", + "url": "https://sindresorhus.com" + }, + "type": "module", + "exports": "./index.js", + "engines": { + "node": ">=12.20" + }, + "scripts": { + "test": "ava && tsd" + }, + "files": [ + "index.js", + "index.d.ts" + ], + "keywords": [ + "normalize", + "url", + "uri", + "address", + "string", + "normalization", + "normalisation", + "query", + "querystring", + "simplify", + "strip", + "trim", + "canonical" + ], + "devDependencies": { + "ava": "^4.0.1", + "c8": "^7.11.0", + "tsd": "^0.19.1", + "xo": "^0.47.0" + }, + "c8": { + "reporter": [ + "text", + "lcov" + ] + } +} diff --git a/src/utils/normalize-url/readme.md b/src/utils/normalize-url/readme.md new file mode 100644 index 000000000..71d8638a1 --- /dev/null +++ b/src/utils/normalize-url/readme.md @@ -0,0 +1,287 @@ +# normalize-url [![Coverage Status](https://codecov.io/gh/sindresorhus/normalize-url/branch/main/graph/badge.svg)](https://codecov.io/gh/sindresorhus/normalize-url) + +> [Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL + +Useful when you need to display, store, deduplicate, sort, compare, etc, URLs. + +**Note:** This package does **not** do URL sanitization. [Garbage in, garbage out.](https://en.wikipedia.org/wiki/Garbage_in,_garbage_out) If you use this in a server context and accept URLs as user input, it's up to you to protect against invalid URLs, [path traversal attacks](https://owasp.org/www-community/attacks/Path_Traversal), etc. + +## Install + +```sh +npm install normalize-url +``` + +*If you need to use this in the browser, use version 4: `npm i normalize-url@4`* + +## Usage + +```js +import normalizeUrl from 'normalize-url'; + +normalizeUrl('sindresorhus.com'); +//=> 'http://sindresorhus.com' + +normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); +//=> 'http://sindresorhus.com/baz?a=foo&b=bar' +``` + +## API + +### normalizeUrl(url, options?) + +#### url + +Type: `string` + +URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). + +#### options + +Type: `object` + +##### defaultProtocol + +Type: `string`\ +Default: `http:` + +##### normalizeProtocol + +Type: `boolean`\ +Default: `true` + +Prepend `defaultProtocol` to the URL if it's protocol-relative. + +```js +normalizeUrl('//sindresorhus.com:80/'); +//=> 'http://sindresorhus.com' + +normalizeUrl('//sindresorhus.com:80/', {normalizeProtocol: false}); +//=> '//sindresorhus.com' +``` + +##### forceHttp + +Type: `boolean`\ +Default: `false` + +Normalize `https:` to `http:`. + +```js +normalizeUrl('https://sindresorhus.com:80/'); +//=> 'https://sindresorhus.com' + +normalizeUrl('https://sindresorhus.com:80/', {forceHttp: true}); +//=> 'http://sindresorhus.com' +``` + +##### forceHttps + +Type: `boolean`\ +Default: `false` + +Normalize `http:` to `https:`. + +```js +normalizeUrl('https://sindresorhus.com:80/'); +//=> 'https://sindresorhus.com' + +normalizeUrl('http://sindresorhus.com:80/', {forceHttps: true}); +//=> 'https://sindresorhus.com' +``` + +This option can't be used with the `forceHttp` option at the same time. + +##### stripAuthentication + +Type: `boolean`\ +Default: `true` + +Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of the URL. + +```js +normalizeUrl('user:password@sindresorhus.com'); +//=> 'https://sindresorhus.com' + +normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false}); +//=> 'https://user:password@sindresorhus.com' +``` + +##### stripHash + +Type: `boolean`\ +Default: `false` + +Strip the hash part of the URL. + +```js +normalizeUrl('sindresorhus.com/about.html#contact'); +//=> 'http://sindresorhus.com/about.html#contact' + +normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true}); +//=> 'http://sindresorhus.com/about.html' +``` + +##### stripProtocol + +Type: `boolean`\ +Default: `false` + +Remove HTTP(S) protocol from the URL: `http://sindresorhus.com` → `sindresorhus.com`. + +```js +normalizeUrl('https://sindresorhus.com'); +//=> 'https://sindresorhus.com' + +normalizeUrl('https://sindresorhus.com', {stripProtocol: true}); +//=> 'sindresorhus.com' +``` + +##### stripTextFragment + +Type: `boolean`\ +Default: `true` + +Strip the [text fragment](https://web.dev/text-fragments/) part of the URL. + +**Note:** The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. + +```js +normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); +//=> 'http://sindresorhus.com/about.html#' + +normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); +//=> 'http://sindresorhus.com/about.html#section' + +normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); +//=> 'http://sindresorhus.com/about.html#:~:text=hello' + +normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); +//=> 'http://sindresorhus.com/about.html#section:~:text=hello' +``` + +##### stripWWW + +Type: `boolean`\ +Default: `true` + +Remove `www.` from the URL. + +```js +normalizeUrl('http://www.sindresorhus.com'); +//=> 'http://sindresorhus.com' + +normalizeUrl('http://www.sindresorhus.com', {stripWWW: false}); +//=> 'http://www.sindresorhus.com' +``` + +##### removeQueryParameters + +Type: `Array | boolean`\ +Default: `[/^utm_\w+/i]` + +Remove query parameters that matches any of the provided strings or regexes. + +```js +normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', { + removeQueryParameters: ['ref'] +}); +//=> 'http://sindresorhus.com/?foo=bar' +``` + +If a boolean is provided, `true` will remove all the query parameters. + +```js +normalizeUrl('www.sindresorhus.com?foo=bar', { + removeQueryParameters: true +}); +//=> 'http://sindresorhus.com' +``` + +`false` will not remove any query parameter. + +```js +normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', { + removeQueryParameters: false +}); +//=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test' +``` + +##### removeTrailingSlash + +Type: `boolean`\ +Default: `true` + +Remove trailing slash. + +**Note:** Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`. + +```js +normalizeUrl('http://sindresorhus.com/redirect/'); +//=> 'http://sindresorhus.com/redirect' + +normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false}); +//=> 'http://sindresorhus.com/redirect/' + +normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false}); +//=> 'http://sindresorhus.com' +``` + +##### removeSingleSlash + +Type: `boolean`\ +Default: `true` + +Remove a sole `/` pathname in the output. This option is independant of `removeTrailingSlash`. + +```js +normalizeUrl('https://sindresorhus.com/'); +//=> 'https://sindresorhus.com' + +normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false}); +//=> 'https://sindresorhus.com/' +``` + +##### removeDirectoryIndex + +Type: `boolean | Array`\ +Default: `false` + +Removes the default directory index file from path that matches any of the provided strings or regexes. When `true`, the regex `/^index\.[a-z]+$/` is used. + +```js +normalizeUrl('www.sindresorhus.com/foo/default.php', { + removeDirectoryIndex: [/^default\.[a-z]+$/] +}); +//=> 'http://sindresorhus.com/foo' +``` + +##### sortQueryParameters + +Type: `boolean`\ +Default: `true` + +Sorts the query parameters alphabetically by key. + +```js +normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { + sortQueryParameters: false +}); +//=> 'http://sindresorhus.com/?b=two&a=one&c=three' +``` + +## Related + +- [compare-urls](https://github.com/sindresorhus/compare-urls) - Compare URLs by first normalizing them + +--- + +
+ + Get professional support for this package with a Tidelift subscription + +
+ + Tidelift helps make open source sustainable for maintainers while giving companies
assurances about security, maintenance, and licensing for their dependencies. +
+
diff --git a/src/utils/url.ts b/src/utils/url.ts index 2074224e4..22f092f9a 100644 --- a/src/utils/url.ts +++ b/src/utils/url.ts @@ -4,7 +4,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import normalizeUrl from 'normalize-url'; +import normalizeUrl from './normalize-url'; /** The http protocol string. */ export const HTTP_PROTOCOL = 'http:'; diff --git a/tsconfig.json b/tsconfig.json index a3e281c63..17276d3cd 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,7 @@ { "extends": "@tsconfig/recommended/tsconfig.json", "compilerOptions": { + "allowJs": true, "target": "ES5", "declaration": true, "noImplicitAny": true, From 88943fd8c4375eac020575e944fa284da6f1e1ae Mon Sep 17 00:00:00 2001 From: Bill Thornton Date: Mon, 11 Jul 2022 17:21:58 -0400 Subject: [PATCH 2/3] Rewrite normalize-url --- .eslintignore | 1 - .github/dependabot.yml | 3 - jest.config.js | 2 +- .../__tests__/address-candidates.test.ts | 5 +- src/utils/__tests__/normalize-url.test.ts | 23 ++ src/utils/normalize-url.ts | 104 +++++++ src/utils/normalize-url/index.d.ts | 263 ---------------- src/utils/normalize-url/index.js | 246 --------------- src/utils/normalize-url/license | 9 - src/utils/normalize-url/package.json | 52 ---- src/utils/normalize-url/readme.md | 287 ------------------ src/utils/url.ts | 2 +- tsconfig.json | 1 - 13 files changed, 131 insertions(+), 867 deletions(-) create mode 100644 src/utils/__tests__/normalize-url.test.ts create mode 100644 src/utils/normalize-url.ts delete mode 100644 src/utils/normalize-url/index.d.ts delete mode 100644 src/utils/normalize-url/index.js delete mode 100644 src/utils/normalize-url/license delete mode 100644 src/utils/normalize-url/package.json delete mode 100644 src/utils/normalize-url/readme.md diff --git a/.eslintignore b/.eslintignore index 42ac8061e..3c93d30b9 100644 --- a/.eslintignore +++ b/.eslintignore @@ -2,4 +2,3 @@ dist generated-client lib node_modules -src/utils/normalize-url diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5ea8b5837..5d8177c23 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -6,9 +6,6 @@ updates: interval: weekly open-pull-requests-limit: 10 rebase-strategy: disabled - ignore: - - dependency-name: normalize-url - update-types: [ version-update:semver-major ] - package-ecosystem: github-actions directory: / diff --git a/jest.config.js b/jest.config.js index 5b32b7af5..ddc159075 100644 --- a/jest.config.js +++ b/jest.config.js @@ -12,5 +12,5 @@ module.exports = { // Coverage options collectCoverageFrom: [ 'src/**' ], - coveragePathIgnorePatterns: [ '__helpers__', 'generated-client', 'normalize-url' ] + coveragePathIgnorePatterns: [ '__helpers__', 'generated-client' ] }; diff --git a/src/utils/__tests__/address-candidates.test.ts b/src/utils/__tests__/address-candidates.test.ts index 41bf30ea1..f1fae5583 100644 --- a/src/utils/__tests__/address-candidates.test.ts +++ b/src/utils/__tests__/address-candidates.test.ts @@ -38,10 +38,9 @@ describe('Address Candidates', () => { expect(candidates[1]).toBe('http://example.com:8888/'); }); - it('should return the entered url non http(s) protocols', () => { + it('should return an empty list for urls with non http(s) protocols', () => { const candidates = getAddressCandidates('ftp://example.com'); - expect(candidates).toHaveLength(1); - expect(candidates[0]).toBe('ftp://example.com/'); + expect(candidates).toHaveLength(0); }); it('should return an empty list for invalid urls', () => { diff --git a/src/utils/__tests__/normalize-url.test.ts b/src/utils/__tests__/normalize-url.test.ts new file mode 100644 index 000000000..54db9c475 --- /dev/null +++ b/src/utils/__tests__/normalize-url.test.ts @@ -0,0 +1,23 @@ +/** + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +import normalizeUrl from '../normalize-url'; + +/** + * Url normalizing tests. + * + * @group unit/utils + */ +describe('Normalize URLs', () => { + it('should normalize URLs correctly', () => { + expect(normalizeUrl('http://foobar/')).toBe('http://foobar/'); + expect(normalizeUrl('//foobar/')).toBe('http://foobar/'); + expect(normalizeUrl('foobar/')).toBe('http://foobar/'); + expect(normalizeUrl('foobar')).toBe('http://foobar/'); + + // FIXME: Need more comprehensive tests + }); +}); diff --git a/src/utils/normalize-url.ts b/src/utils/normalize-url.ts new file mode 100644 index 000000000..3711a47b9 --- /dev/null +++ b/src/utils/normalize-url.ts @@ -0,0 +1,104 @@ +/** + * MIT License + * + * Copyright (c) 2022 Jellyfin Contributors + * Copyright (c) 2015 - 2022 Sindre Sorhus (https://sindresorhus.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS + * OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * A fork of https://github.com/sindresorhus/normalize-url ported to typescript with all unneeded features removed + */ + +import { HTTP_PROTOCOL } from './url'; + +export default function normalizeUrl(urlString: string): string { + urlString = urlString.trim(); + + const hasRelativeProtocol = urlString.startsWith('//'); + const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); + + // Prepend protocol + if (!isRelativeUrl) { + urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, HTTP_PROTOCOL); + } + + if (!/^https?:/i.test(urlString)) { + throw new Error('only http or https protocols are supported'); + } + + const urlObject = new URL(urlString); + + // Remove hash + urlObject.hash = ''; + + // Remove duplicate slashes if not preceded by a protocol + // NOTE: This could be implemented using a single negative lookbehind + // regex, but we avoid that to maintain compatibility with older js engines + // which do not have support for that feature. + if (urlObject.pathname) { + // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 'http://sindresorhus.com' - - normalizeUrl('//sindresorhus.com:80/', {normalizeProtocol: false}); - //=> '//sindresorhus.com' - ``` - */ - readonly normalizeProtocol?: boolean; - - /** - Normalizes `https:` URLs to `http:`. - - @default false - - @example - ``` - normalizeUrl('https://sindresorhus.com:80/'); - //=> 'https://sindresorhus.com' - - normalizeUrl('https://sindresorhus.com:80/', {forceHttp: true}); - //=> 'http://sindresorhus.com' - ``` - */ - readonly forceHttp?: boolean; - - /** - Normalizes `http:` URLs to `https:`. - - This option can't be used with the `forceHttp` option at the same time. - - @default false - - @example - ``` - normalizeUrl('https://sindresorhus.com:80/'); - //=> 'https://sindresorhus.com' - - normalizeUrl('http://sindresorhus.com:80/', {forceHttps: true}); - //=> 'https://sindresorhus.com' - ``` - */ - readonly forceHttps?: boolean; - - /** - Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of a URL. - - @default true - - @example - ``` - normalizeUrl('user:password@sindresorhus.com'); - //=> 'https://sindresorhus.com' - - normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false}); - //=> 'https://user:password@sindresorhus.com' - ``` - */ - readonly stripAuthentication?: boolean; - - /** - Removes hash from the URL. - - @default false - - @example - ``` - normalizeUrl('sindresorhus.com/about.html#contact'); - //=> 'http://sindresorhus.com/about.html#contact' - - normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true}); - //=> 'http://sindresorhus.com/about.html' - ``` - */ - readonly stripHash?: boolean; - - /** - Removes HTTP(S) protocol from an URL `http://sindresorhus.com` → `sindresorhus.com`. - - @default false - - @example - ``` - normalizeUrl('https://sindresorhus.com'); - //=> 'https://sindresorhus.com' - - normalizeUrl('sindresorhus.com', {stripProtocol: true}); - //=> 'sindresorhus.com' - ``` - */ - readonly stripProtocol?: boolean; - - /** - Strip the [text fragment](https://web.dev/text-fragments/) part of the URL - - __Note:__ The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. - - @default true - - @example - ``` - normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); - //=> 'http://sindresorhus.com/about.html#' - - normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); - //=> 'http://sindresorhus.com/about.html#section' - - normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); - //=> 'http://sindresorhus.com/about.html#:~:text=hello' - - normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); - //=> 'http://sindresorhus.com/about.html#section:~:text=hello' - ``` - */ - readonly stripTextFragment?: boolean; - - /** - Removes `www.` from the URL. - - @default true - - @example - ``` - normalizeUrl('http://www.sindresorhus.com'); - //=> 'http://sindresorhus.com' - - normalizeUrl('http://www.sindresorhus.com', {stripWWW: false}); - //=> 'http://www.sindresorhus.com' - ``` - */ - readonly stripWWW?: boolean; - - /** - Removes query parameters that matches any of the provided strings or regexes. - - @default [/^utm_\w+/i] - - @example - ``` - normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', { - removeQueryParameters: ['ref'] - }); - //=> 'http://sindresorhus.com/?foo=bar' - ``` - - If a boolean is provided, `true` will remove all the query parameters. - - ``` - normalizeUrl('www.sindresorhus.com?foo=bar', { - removeQueryParameters: true - }); - //=> 'http://sindresorhus.com' - ``` - - `false` will not remove any query parameter. - - ``` - normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', { - removeQueryParameters: false - }); - //=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test' - ``` - */ - readonly removeQueryParameters?: ReadonlyArray | boolean; - - /** - Removes trailing slash. - - __Note__: Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`. - - @default true - - @example - ``` - normalizeUrl('http://sindresorhus.com/redirect/'); - //=> 'http://sindresorhus.com/redirect' - - normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false}); - //=> 'http://sindresorhus.com/redirect/' - - normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false}); - //=> 'http://sindresorhus.com' - ``` - */ - readonly removeTrailingSlash?: boolean; - - /** - Remove a sole `/` pathname in the output. This option is independant of `removeTrailingSlash`. - - @default true - - @example - ``` - normalizeUrl('https://sindresorhus.com/'); - //=> 'https://sindresorhus.com' - - normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false}); - //=> 'https://sindresorhus.com/' - ``` - */ - readonly removeSingleSlash?: boolean; - - /** - Removes the default directory index file from path that matches any of the provided strings or regexes. - When `true`, the regex `/^index\.[a-z]+$/` is used. - - @default false - - @example - ``` - normalizeUrl('www.sindresorhus.com/foo/default.php', { - removeDirectoryIndex: [/^default\.[a-z]+$/] - }); - //=> 'http://sindresorhus.com/foo' - ``` - */ - readonly removeDirectoryIndex?: boolean | ReadonlyArray; - - /** - Sorts the query parameters alphabetically by key. - - @default true - - @example - ``` - normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { - sortQueryParameters: false - }); - //=> 'http://sindresorhus.com/?b=two&a=one&c=three' - ``` - */ - readonly sortQueryParameters?: boolean; -} - -/** -[Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL. - -@param url - URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). - -@example -``` -import normalizeUrl from 'normalize-url'; - -normalizeUrl('sindresorhus.com'); -//=> 'http://sindresorhus.com' - -normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); -//=> 'http://sindresorhus.com/baz?a=foo&b=bar' -``` -*/ -export default function normalizeUrl(url: string, options?: Options): string; diff --git a/src/utils/normalize-url/index.js b/src/utils/normalize-url/index.js deleted file mode 100644 index 1c5f9a14f..000000000 --- a/src/utils/normalize-url/index.js +++ /dev/null @@ -1,246 +0,0 @@ -// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs -const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; -const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; - -const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); - -const normalizeDataURL = (urlString, {stripHash}) => { - const match = /^data:(?[^,]*?),(?[^#]*?)(?:#(?.*))?$/.exec(urlString); - - if (!match) { - throw new Error(`Invalid URL: ${urlString}`); - } - - let {type, data, hash} = match.groups; - const mediaType = type.split(';'); - hash = stripHash ? '' : hash; - - let isBase64 = false; - if (mediaType[mediaType.length - 1] === 'base64') { - mediaType.pop(); - isBase64 = true; - } - - // Lowercase MIME type - const mimeType = (mediaType.shift() || '').toLowerCase(); - const attributes = mediaType - .map(attribute => { - let [key, value = ''] = attribute.split('=').map(string => string.trim()); - - // Lowercase `charset` - if (key === 'charset') { - value = value.toLowerCase(); - - if (value === DATA_URL_DEFAULT_CHARSET) { - return ''; - } - } - - return `${key}${value ? `=${value}` : ''}`; - }) - .filter(Boolean); - - const normalizedMediaType = [ - ...attributes, - ]; - - if (isBase64) { - normalizedMediaType.push('base64'); - } - - if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { - normalizedMediaType.unshift(mimeType); - } - - return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; -}; - -export default function normalizeUrl(urlString, options) { - options = { - defaultProtocol: 'http:', - normalizeProtocol: true, - forceHttp: false, - forceHttps: false, - stripAuthentication: true, - stripHash: false, - stripTextFragment: true, - stripWWW: true, - removeQueryParameters: [/^utm_\w+/i], - removeTrailingSlash: true, - removeSingleSlash: true, - removeDirectoryIndex: false, - sortQueryParameters: true, - ...options, - }; - - urlString = urlString.trim(); - - // Data URL - if (/^data:/i.test(urlString)) { - return normalizeDataURL(urlString, options); - } - - if (/^view-source:/i.test(urlString)) { - throw new Error('`view-source:` is not supported as it is a non-standard protocol'); - } - - const hasRelativeProtocol = urlString.startsWith('//'); - const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); - - // Prepend protocol - if (!isRelativeUrl) { - urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); - } - - const urlObject = new URL(urlString); - - if (options.forceHttp && options.forceHttps) { - throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); - } - - if (options.forceHttp && urlObject.protocol === 'https:') { - urlObject.protocol = 'http:'; - } - - if (options.forceHttps && urlObject.protocol === 'http:') { - urlObject.protocol = 'https:'; - } - - // Remove auth - if (options.stripAuthentication) { - urlObject.username = ''; - urlObject.password = ''; - } - - // Remove hash - if (options.stripHash) { - urlObject.hash = ''; - } else if (options.stripTextFragment) { - urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); - } - - // Remove duplicate slashes if not preceded by a protocol - // NOTE: This could be implemented using a single negative lookbehind - // regex, but we avoid that to maintain compatibility with older js engines - // which do not have support for that feature. - if (urlObject.pathname) { - // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(? 0) { - let pathComponents = urlObject.pathname.split('/'); - const lastComponent = pathComponents[pathComponents.length - 1]; - - if (testParameter(lastComponent, options.removeDirectoryIndex)) { - pathComponents = pathComponents.slice(0, -1); - urlObject.pathname = pathComponents.slice(1).join('/') + '/'; - } - } - - if (urlObject.hostname) { - // Remove trailing dot - urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); - - // Remove `www.` - if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { - // Each label should be max 63 at length (min: 1). - // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names - // Each TLD should be up to 63 characters long (min: 2). - // It is technically possible to have a single character TLD, but none currently exist. - urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); - } - } - - // Remove query unwanted parameters - if (Array.isArray(options.removeQueryParameters)) { - // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. - for (const key of [...urlObject.searchParams.keys()]) { - if (testParameter(key, options.removeQueryParameters)) { - urlObject.searchParams.delete(key); - } - } - } - - if (options.removeQueryParameters === true) { - urlObject.search = ''; - } - - // Sort query parameters - if (options.sortQueryParameters) { - urlObject.searchParams.sort(); - - // Calling `.sort()` encodes the search parameters, so we need to decode them again. - try { - urlObject.search = decodeURIComponent(urlObject.search); - } catch {} - } - - if (options.removeTrailingSlash) { - urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); - } - - const oldUrlString = urlString; - - // Take advantage of many of the Node `url` normalizations - urlString = urlObject.toString(); - - if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { - urlString = urlString.replace(/\/$/, ''); - } - - // Remove ending `/` unless removeSingleSlash is false - if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { - urlString = urlString.replace(/\/$/, ''); - } - - // Restore relative protocol, if applicable - if (hasRelativeProtocol && !options.normalizeProtocol) { - urlString = urlString.replace(/^http:\/\//, '//'); - } - - // Remove http/https - if (options.stripProtocol) { - urlString = urlString.replace(/^(?:https?:)?\/\//, ''); - } - - return urlString; -} diff --git a/src/utils/normalize-url/license b/src/utils/normalize-url/license deleted file mode 100644 index fa7ceba3e..000000000 --- a/src/utils/normalize-url/license +++ /dev/null @@ -1,9 +0,0 @@ -MIT License - -Copyright (c) Sindre Sorhus (https://sindresorhus.com) - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/utils/normalize-url/package.json b/src/utils/normalize-url/package.json deleted file mode 100644 index b9eb910c1..000000000 --- a/src/utils/normalize-url/package.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "name": "normalize-url", - "version": "7.0.3", - "description": "Normalize a URL", - "license": "MIT", - "repository": "sindresorhus/normalize-url", - "funding": "https://github.com/sponsors/sindresorhus", - "author": { - "name": "Sindre Sorhus", - "email": "sindresorhus@gmail.com", - "url": "https://sindresorhus.com" - }, - "type": "module", - "exports": "./index.js", - "engines": { - "node": ">=12.20" - }, - "scripts": { - "test": "ava && tsd" - }, - "files": [ - "index.js", - "index.d.ts" - ], - "keywords": [ - "normalize", - "url", - "uri", - "address", - "string", - "normalization", - "normalisation", - "query", - "querystring", - "simplify", - "strip", - "trim", - "canonical" - ], - "devDependencies": { - "ava": "^4.0.1", - "c8": "^7.11.0", - "tsd": "^0.19.1", - "xo": "^0.47.0" - }, - "c8": { - "reporter": [ - "text", - "lcov" - ] - } -} diff --git a/src/utils/normalize-url/readme.md b/src/utils/normalize-url/readme.md deleted file mode 100644 index 71d8638a1..000000000 --- a/src/utils/normalize-url/readme.md +++ /dev/null @@ -1,287 +0,0 @@ -# normalize-url [![Coverage Status](https://codecov.io/gh/sindresorhus/normalize-url/branch/main/graph/badge.svg)](https://codecov.io/gh/sindresorhus/normalize-url) - -> [Normalize](https://en.wikipedia.org/wiki/URL_normalization) a URL - -Useful when you need to display, store, deduplicate, sort, compare, etc, URLs. - -**Note:** This package does **not** do URL sanitization. [Garbage in, garbage out.](https://en.wikipedia.org/wiki/Garbage_in,_garbage_out) If you use this in a server context and accept URLs as user input, it's up to you to protect against invalid URLs, [path traversal attacks](https://owasp.org/www-community/attacks/Path_Traversal), etc. - -## Install - -```sh -npm install normalize-url -``` - -*If you need to use this in the browser, use version 4: `npm i normalize-url@4`* - -## Usage - -```js -import normalizeUrl from 'normalize-url'; - -normalizeUrl('sindresorhus.com'); -//=> 'http://sindresorhus.com' - -normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); -//=> 'http://sindresorhus.com/baz?a=foo&b=bar' -``` - -## API - -### normalizeUrl(url, options?) - -#### url - -Type: `string` - -URL to normalize, including [data URL](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs). - -#### options - -Type: `object` - -##### defaultProtocol - -Type: `string`\ -Default: `http:` - -##### normalizeProtocol - -Type: `boolean`\ -Default: `true` - -Prepend `defaultProtocol` to the URL if it's protocol-relative. - -```js -normalizeUrl('//sindresorhus.com:80/'); -//=> 'http://sindresorhus.com' - -normalizeUrl('//sindresorhus.com:80/', {normalizeProtocol: false}); -//=> '//sindresorhus.com' -``` - -##### forceHttp - -Type: `boolean`\ -Default: `false` - -Normalize `https:` to `http:`. - -```js -normalizeUrl('https://sindresorhus.com:80/'); -//=> 'https://sindresorhus.com' - -normalizeUrl('https://sindresorhus.com:80/', {forceHttp: true}); -//=> 'http://sindresorhus.com' -``` - -##### forceHttps - -Type: `boolean`\ -Default: `false` - -Normalize `http:` to `https:`. - -```js -normalizeUrl('https://sindresorhus.com:80/'); -//=> 'https://sindresorhus.com' - -normalizeUrl('http://sindresorhus.com:80/', {forceHttps: true}); -//=> 'https://sindresorhus.com' -``` - -This option can't be used with the `forceHttp` option at the same time. - -##### stripAuthentication - -Type: `boolean`\ -Default: `true` - -Strip the [authentication](https://en.wikipedia.org/wiki/Basic_access_authentication) part of the URL. - -```js -normalizeUrl('user:password@sindresorhus.com'); -//=> 'https://sindresorhus.com' - -normalizeUrl('user:password@sindresorhus.com', {stripAuthentication: false}); -//=> 'https://user:password@sindresorhus.com' -``` - -##### stripHash - -Type: `boolean`\ -Default: `false` - -Strip the hash part of the URL. - -```js -normalizeUrl('sindresorhus.com/about.html#contact'); -//=> 'http://sindresorhus.com/about.html#contact' - -normalizeUrl('sindresorhus.com/about.html#contact', {stripHash: true}); -//=> 'http://sindresorhus.com/about.html' -``` - -##### stripProtocol - -Type: `boolean`\ -Default: `false` - -Remove HTTP(S) protocol from the URL: `http://sindresorhus.com` → `sindresorhus.com`. - -```js -normalizeUrl('https://sindresorhus.com'); -//=> 'https://sindresorhus.com' - -normalizeUrl('https://sindresorhus.com', {stripProtocol: true}); -//=> 'sindresorhus.com' -``` - -##### stripTextFragment - -Type: `boolean`\ -Default: `true` - -Strip the [text fragment](https://web.dev/text-fragments/) part of the URL. - -**Note:** The text fragment will always be removed if the `stripHash` option is set to `true`, as the hash contains the text fragment. - -```js -normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello'); -//=> 'http://sindresorhus.com/about.html#' - -normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello'); -//=> 'http://sindresorhus.com/about.html#section' - -normalizeUrl('http://sindresorhus.com/about.html#:~:text=hello', {stripTextFragment: false}); -//=> 'http://sindresorhus.com/about.html#:~:text=hello' - -normalizeUrl('http://sindresorhus.com/about.html#section:~:text=hello', {stripTextFragment: false}); -//=> 'http://sindresorhus.com/about.html#section:~:text=hello' -``` - -##### stripWWW - -Type: `boolean`\ -Default: `true` - -Remove `www.` from the URL. - -```js -normalizeUrl('http://www.sindresorhus.com'); -//=> 'http://sindresorhus.com' - -normalizeUrl('http://www.sindresorhus.com', {stripWWW: false}); -//=> 'http://www.sindresorhus.com' -``` - -##### removeQueryParameters - -Type: `Array | boolean`\ -Default: `[/^utm_\w+/i]` - -Remove query parameters that matches any of the provided strings or regexes. - -```js -normalizeUrl('www.sindresorhus.com?foo=bar&ref=test_ref', { - removeQueryParameters: ['ref'] -}); -//=> 'http://sindresorhus.com/?foo=bar' -``` - -If a boolean is provided, `true` will remove all the query parameters. - -```js -normalizeUrl('www.sindresorhus.com?foo=bar', { - removeQueryParameters: true -}); -//=> 'http://sindresorhus.com' -``` - -`false` will not remove any query parameter. - -```js -normalizeUrl('www.sindresorhus.com?foo=bar&utm_medium=test&ref=test_ref', { - removeQueryParameters: false -}); -//=> 'http://www.sindresorhus.com/?foo=bar&ref=test_ref&utm_medium=test' -``` - -##### removeTrailingSlash - -Type: `boolean`\ -Default: `true` - -Remove trailing slash. - -**Note:** Trailing slash is always removed if the URL doesn't have a pathname unless the `removeSingleSlash` option is set to `false`. - -```js -normalizeUrl('http://sindresorhus.com/redirect/'); -//=> 'http://sindresorhus.com/redirect' - -normalizeUrl('http://sindresorhus.com/redirect/', {removeTrailingSlash: false}); -//=> 'http://sindresorhus.com/redirect/' - -normalizeUrl('http://sindresorhus.com/', {removeTrailingSlash: false}); -//=> 'http://sindresorhus.com' -``` - -##### removeSingleSlash - -Type: `boolean`\ -Default: `true` - -Remove a sole `/` pathname in the output. This option is independant of `removeTrailingSlash`. - -```js -normalizeUrl('https://sindresorhus.com/'); -//=> 'https://sindresorhus.com' - -normalizeUrl('https://sindresorhus.com/', {removeSingleSlash: false}); -//=> 'https://sindresorhus.com/' -``` - -##### removeDirectoryIndex - -Type: `boolean | Array`\ -Default: `false` - -Removes the default directory index file from path that matches any of the provided strings or regexes. When `true`, the regex `/^index\.[a-z]+$/` is used. - -```js -normalizeUrl('www.sindresorhus.com/foo/default.php', { - removeDirectoryIndex: [/^default\.[a-z]+$/] -}); -//=> 'http://sindresorhus.com/foo' -``` - -##### sortQueryParameters - -Type: `boolean`\ -Default: `true` - -Sorts the query parameters alphabetically by key. - -```js -normalizeUrl('www.sindresorhus.com?b=two&a=one&c=three', { - sortQueryParameters: false -}); -//=> 'http://sindresorhus.com/?b=two&a=one&c=three' -``` - -## Related - -- [compare-urls](https://github.com/sindresorhus/compare-urls) - Compare URLs by first normalizing them - ---- - -
- - Get professional support for this package with a Tidelift subscription - -
- - Tidelift helps make open source sustainable for maintainers while giving companies
assurances about security, maintenance, and licensing for their dependencies. -
-
diff --git a/src/utils/url.ts b/src/utils/url.ts index 22f092f9a..953df8966 100644 --- a/src/utils/url.ts +++ b/src/utils/url.ts @@ -49,6 +49,6 @@ export function parseUrl(input: string): URL { } return new URL( - normalizeUrl(input, { stripWWW: false }) + normalizeUrl(input) ); } diff --git a/tsconfig.json b/tsconfig.json index 17276d3cd..a3e281c63 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,7 +1,6 @@ { "extends": "@tsconfig/recommended/tsconfig.json", "compilerOptions": { - "allowJs": true, "target": "ES5", "declaration": true, "noImplicitAny": true, From af787b49162a8120be02422ffe0124112aee6838 Mon Sep 17 00:00:00 2001 From: Bill Thornton Date: Wed, 13 Jul 2022 17:16:30 -0400 Subject: [PATCH 3/3] Add more tests for normalize-url --- src/utils/__tests__/normalize-url.test.ts | 40 ++++++++++++++++++++--- src/utils/normalize-url.ts | 17 ++++++++-- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/utils/__tests__/normalize-url.test.ts b/src/utils/__tests__/normalize-url.test.ts index 54db9c475..7a4f5f004 100644 --- a/src/utils/__tests__/normalize-url.test.ts +++ b/src/utils/__tests__/normalize-url.test.ts @@ -13,11 +13,41 @@ import normalizeUrl from '../normalize-url'; */ describe('Normalize URLs', () => { it('should normalize URLs correctly', () => { - expect(normalizeUrl('http://foobar/')).toBe('http://foobar/'); - expect(normalizeUrl('//foobar/')).toBe('http://foobar/'); - expect(normalizeUrl('foobar/')).toBe('http://foobar/'); - expect(normalizeUrl('foobar')).toBe('http://foobar/'); + expect(normalizeUrl('http://example.com/')).toBe('http://example.com/'); + expect(normalizeUrl('http://example.com')).toBe('http://example.com/'); + expect(normalizeUrl('https://example.com/')).toBe('https://example.com/'); + expect(normalizeUrl('http://example.com/foo/bar/')).toBe('http://example.com/foo/bar'); - // FIXME: Need more comprehensive tests + // Don't reduce double slashes if part of a protocol + expect(normalizeUrl('https://example.com/https://jellyfin.org')).toBe('https://example.com/https://jellyfin.org'); + expect(normalizeUrl('https://example.com/https://jellyfin.org/foo//bar')).toBe('https://example.com/https://jellyfin.org/foo/bar'); + expect(normalizeUrl('https://example.com/http://jellyfin.org')).toBe('https://example.com/http://jellyfin.org'); + expect(normalizeUrl('https://example.com/http://jellyfin.org/foo//bar')).toBe('https://example.com/http://jellyfin.org/foo/bar'); + + // Strip trailing dots in domain names + expect(normalizeUrl('http://example.com./')).toBe('http://example.com/'); + + // Strip hashes from URLs + expect(normalizeUrl('http://example.com/#/hash/path')).toBe('http://example.com/'); + }); + + it('should default to using http protocol when not specified', () => { + expect(normalizeUrl('//example.com/')).toBe('http://example.com/'); + expect(normalizeUrl('example.com/')).toBe('http://example.com/'); + expect(normalizeUrl('example.com')).toBe('http://example.com/'); + }); + + it('should throw for non http(s) protocols', () => { + expect(() => { + normalizeUrl('data:ASDF'); + }).toThrow('data URLs are not supported'); + + expect(() => { + normalizeUrl('view-source:example.com'); + }).toThrow('`view-source:` is not supported as it is a non-standard protocol'); + + expect(() => { + normalizeUrl('ftp://example.com'); + }).toThrow('only http or https protocols are supported'); }); }); diff --git a/src/utils/normalize-url.ts b/src/utils/normalize-url.ts index 3711a47b9..a8e53fa3c 100644 --- a/src/utils/normalize-url.ts +++ b/src/utils/normalize-url.ts @@ -18,15 +18,26 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +import { HTTP_PROTOCOL } from './url'; + /* - * A fork of https://github.com/sindresorhus/normalize-url ported to typescript with all unneeded features removed + * A fork of https://github.com/sindresorhus/normalize-url ported to typescript with all unneeded features removed. + * This was necessary due to v7 only providing ES module builds that are poorly supported and v6 using poorly supported + * regex features. */ -import { HTTP_PROTOCOL } from './url'; - export default function normalizeUrl(urlString: string): string { urlString = urlString.trim(); + // Data URL + if (/^data:/i.test(urlString)) { + throw new Error('data URLs are not supported'); + } + + if (/^view-source:/i.test(urlString)) { + throw new Error('`view-source:` is not supported as it is a non-standard protocol'); + } + const hasRelativeProtocol = urlString.startsWith('//'); const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);