Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: better handling of whitespace (backport to v6) #591

Merged
merged 1 commit into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 71 additions & 24 deletions semver.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER ||
// Max safe segment length for coercion.
var MAX_SAFE_COMPONENT_LENGTH = 16

var MAX_SAFE_BUILD_LENGTH = MAX_LENGTH - 6

// The actual regexps go on exports.re
var re = exports.re = []
var safeRe = exports.safeRe = []
var src = exports.src = []
var t = exports.tokens = {}
var R = 0
Expand All @@ -36,6 +39,31 @@ function tok (n) {
t[n] = R++
}

var LETTERDASHNUMBER = '[a-zA-Z0-9-]'

// Replace some greedy regex tokens to prevent regex dos issues. These regex are
// used internally via the safeRe object since all inputs in this library get
// normalized first to trim and collapse all extra whitespace. The original
// regexes are exported for userland consumption and lower level usage. A
// future breaking change could export the safer regex only with a note that
// all input should have extra whitespace removed.
var safeRegexReplacements = [
['\\s', 1],
['\\d', MAX_LENGTH],
[LETTERDASHNUMBER, MAX_SAFE_BUILD_LENGTH],
]

function makeSafeRe (value) {
for (var i = 0; i < safeRegexReplacements.length; i++) {
var token = safeRegexReplacements[i][0]
var max = safeRegexReplacements[i][1]
value = value
.split(token + '*').join(token + '{0,' + max + '}')
.split(token + '+').join(token + '{1,' + max + '}')
}
return value
}

// The following Regular Expressions can be used for tokenizing,
// validating, and parsing SemVer version strings.

Expand All @@ -45,14 +73,14 @@ function tok (n) {
tok('NUMERICIDENTIFIER')
src[t.NUMERICIDENTIFIER] = '0|[1-9]\\d*'
tok('NUMERICIDENTIFIERLOOSE')
src[t.NUMERICIDENTIFIERLOOSE] = '[0-9]+'
src[t.NUMERICIDENTIFIERLOOSE] = '\\d+'

// ## Non-numeric Identifier
// Zero or more digits, followed by a letter or hyphen, and then zero or
// more letters, digits, or hyphens.

tok('NONNUMERICIDENTIFIER')
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-][a-zA-Z0-9-]*'
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-]' + LETTERDASHNUMBER + '*'

// ## Main Version
// Three dot-separated numeric identifiers.
Expand Down Expand Up @@ -94,7 +122,7 @@ src[t.PRERELEASELOOSE] = '(?:-?(' + src[t.PRERELEASEIDENTIFIERLOOSE] +
// Any combination of digits, letters, or hyphens.

tok('BUILDIDENTIFIER')
src[t.BUILDIDENTIFIER] = '[0-9A-Za-z-]+'
src[t.BUILDIDENTIFIER] = LETTERDASHNUMBER + '+'

// ## Build Metadata
// Plus sign, followed by one or more period-separated build metadata
Expand Down Expand Up @@ -174,6 +202,7 @@ src[t.COERCE] = '(^|[^\\d])' +
'(?:$|[^\\d])'
tok('COERCERTL')
re[t.COERCERTL] = new RegExp(src[t.COERCE], 'g')
safeRe[t.COERCERTL] = new RegExp(makeSafeRe(src[t.COERCE]), 'g')

// Tilde ranges.
// Meaning is "reasonably at or greater than"
Expand All @@ -183,6 +212,7 @@ src[t.LONETILDE] = '(?:~>?)'
tok('TILDETRIM')
src[t.TILDETRIM] = '(\\s*)' + src[t.LONETILDE] + '\\s+'
re[t.TILDETRIM] = new RegExp(src[t.TILDETRIM], 'g')
safeRe[t.TILDETRIM] = new RegExp(makeSafeRe(src[t.TILDETRIM]), 'g')
var tildeTrimReplace = '$1~'

tok('TILDE')
Expand All @@ -198,6 +228,7 @@ src[t.LONECARET] = '(?:\\^)'
tok('CARETTRIM')
src[t.CARETTRIM] = '(\\s*)' + src[t.LONECARET] + '\\s+'
re[t.CARETTRIM] = new RegExp(src[t.CARETTRIM], 'g')
safeRe[t.CARETTRIM] = new RegExp(makeSafeRe(src[t.CARETTRIM]), 'g')
var caretTrimReplace = '$1^'

tok('CARET')
Expand All @@ -219,6 +250,7 @@ src[t.COMPARATORTRIM] = '(\\s*)' + src[t.GTLT] +

// this one has to use the /g flag
re[t.COMPARATORTRIM] = new RegExp(src[t.COMPARATORTRIM], 'g')
safeRe[t.COMPARATORTRIM] = new RegExp(makeSafeRe(src[t.COMPARATORTRIM]), 'g')
var comparatorTrimReplace = '$1$2$3'

// Something like `1.2.3 - 1.2.4`
Expand Down Expand Up @@ -247,6 +279,14 @@ for (var i = 0; i < R; i++) {
debug(i, src[i])
if (!re[i]) {
re[i] = new RegExp(src[i])

// Replace all greedy whitespace to prevent regex dos issues. These regex are
// used internally via the safeRe object since all inputs in this library get
// normalized first to trim and collapse all extra whitespace. The original
// regexes are exported for userland consumption and lower level usage. A
// future breaking change could export the safer regex only with a note that
// all input should have extra whitespace removed.
safeRe[i] = new RegExp(makeSafeRe(src[i]))
}
}

Expand All @@ -271,7 +311,7 @@ function parse (version, options) {
return null
}

var r = options.loose ? re[t.LOOSE] : re[t.FULL]
var r = options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL]
if (!r.test(version)) {
return null
}
Expand Down Expand Up @@ -326,7 +366,7 @@ function SemVer (version, options) {
this.options = options
this.loose = !!options.loose

var m = version.trim().match(options.loose ? re[t.LOOSE] : re[t.FULL])
var m = version.trim().match(options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL])

if (!m) {
throw new TypeError('Invalid Version: ' + version)
Expand Down Expand Up @@ -771,6 +811,7 @@ function Comparator (comp, options) {
return new Comparator(comp, options)
}

comp = comp.trim().split(/\s+/).join(' ')
debug('comparator', comp, options)
this.options = options
this.loose = !!options.loose
Expand All @@ -787,7 +828,7 @@ function Comparator (comp, options) {

var ANY = {}
Comparator.prototype.parse = function (comp) {
var r = this.options.loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
var r = this.options.loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
var m = comp.match(r)

if (!m) {
Expand Down Expand Up @@ -911,17 +952,24 @@ function Range (range, options) {
this.loose = !!options.loose
this.includePrerelease = !!options.includePrerelease

// First, split based on boolean or ||
// First reduce all whitespace as much as possible so we do not have to rely
// on potentially slow regexes like \s*. This is then stored and used for
// future error messages as well.
this.raw = range
this.set = range.split(/\s*\|\|\s*/).map(function (range) {
.trim()
.split(/\s+/)
.join(' ')

// First, split based on boolean or ||
this.set = this.raw.split('||').map(function (range) {
return this.parseRange(range.trim())
}, this).filter(function (c) {
// throw out any that are not relevant for whatever reason
return c.length
})

if (!this.set.length) {
throw new TypeError('Invalid SemVer Range: ' + range)
throw new TypeError('Invalid SemVer Range: ' + this.raw)
}

this.format()
Expand All @@ -940,28 +988,27 @@ Range.prototype.toString = function () {

Range.prototype.parseRange = function (range) {
var loose = this.options.loose
range = range.trim()
// `1.2.3 - 1.2.4` => `>=1.2.3 <=1.2.4`
var hr = loose ? re[t.HYPHENRANGELOOSE] : re[t.HYPHENRANGE]
var hr = loose ? safeRe[t.HYPHENRANGELOOSE] : safeRe[t.HYPHENRANGE]
range = range.replace(hr, hyphenReplace)
debug('hyphen replace', range)
// `> 1.2.3 < 1.2.5` => `>1.2.3 <1.2.5`
range = range.replace(re[t.COMPARATORTRIM], comparatorTrimReplace)
debug('comparator trim', range, re[t.COMPARATORTRIM])
range = range.replace(safeRe[t.COMPARATORTRIM], comparatorTrimReplace)
debug('comparator trim', range, safeRe[t.COMPARATORTRIM])

// `~ 1.2.3` => `~1.2.3`
range = range.replace(re[t.TILDETRIM], tildeTrimReplace)
range = range.replace(safeRe[t.TILDETRIM], tildeTrimReplace)

// `^ 1.2.3` => `^1.2.3`
range = range.replace(re[t.CARETTRIM], caretTrimReplace)
range = range.replace(safeRe[t.CARETTRIM], caretTrimReplace)

// normalize spaces
range = range.split(/\s+/).join(' ')

// At this point, the range is completely trimmed and
// ready to be split into comparators.

var compRe = loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
var compRe = loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
var set = range.split(' ').map(function (comp) {
return parseComparator(comp, this.options)
}, this).join(' ').split(/\s+/)
Expand Down Expand Up @@ -1061,7 +1108,7 @@ function replaceTildes (comp, options) {
}

function replaceTilde (comp, options) {
var r = options.loose ? re[t.TILDELOOSE] : re[t.TILDE]
var r = options.loose ? safeRe[t.TILDELOOSE] : safeRe[t.TILDE]
return comp.replace(r, function (_, M, m, p, pr) {
debug('tilde', comp, _, M, m, p, pr)
var ret
Expand Down Expand Up @@ -1102,7 +1149,7 @@ function replaceCarets (comp, options) {

function replaceCaret (comp, options) {
debug('caret', comp, options)
var r = options.loose ? re[t.CARETLOOSE] : re[t.CARET]
var r = options.loose ? safeRe[t.CARETLOOSE] : safeRe[t.CARET]
return comp.replace(r, function (_, M, m, p, pr) {
debug('caret', comp, _, M, m, p, pr)
var ret
Expand Down Expand Up @@ -1161,7 +1208,7 @@ function replaceXRanges (comp, options) {

function replaceXRange (comp, options) {
comp = comp.trim()
var r = options.loose ? re[t.XRANGELOOSE] : re[t.XRANGE]
var r = options.loose ? safeRe[t.XRANGELOOSE] : safeRe[t.XRANGE]
return comp.replace(r, function (ret, gtlt, M, m, p, pr) {
debug('xRange', comp, ret, gtlt, M, m, p, pr)
var xM = isX(M)
Expand Down Expand Up @@ -1236,7 +1283,7 @@ function replaceXRange (comp, options) {
function replaceStars (comp, options) {
debug('replaceStars', comp, options)
// Looseness is ignored here. star is always as loose as it gets!
return comp.trim().replace(re[t.STAR], '')
return comp.trim().replace(safeRe[t.STAR], '')
}

// This function is passed to string.replace(re[t.HYPHENRANGE])
Expand Down Expand Up @@ -1562,7 +1609,7 @@ function coerce (version, options) {

var match = null
if (!options.rtl) {
match = version.match(re[t.COERCE])
match = version.match(safeRe[t.COERCE])
} else {
// Find the right-most coercible string that does not share
// a terminus with a more left-ward coercible string.
Expand All @@ -1573,17 +1620,17 @@ function coerce (version, options) {
// Stop when we get a match that ends at the string end, since no
// coercible string can be more right-ward without the same terminus.
var next
while ((next = re[t.COERCERTL].exec(version)) &&
while ((next = safeRe[t.COERCERTL].exec(version)) &&
(!match || match.index + match[0].length !== version.length)
) {
if (!match ||
next.index + next[0].length !== match.index + match[0].length) {
match = next
}
re[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
safeRe[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
}
// leave it in a clean state
re[t.COERCERTL].lastIndex = -1
safeRe[t.COERCERTL].lastIndex = -1
}

if (match === null) {
Expand Down
10 changes: 10 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ test('negative range tests', function (t) {
['*', 'not a version'],
['>=2', 'glorp'],
['2.x', '3.0.0-pre.0', { includePrerelease: true }],
['== 1.0.0 || foo', '2.0.0', { loose: true }]
].forEach(function (v) {
var range = v[0]
var ver = v[1]
Expand Down Expand Up @@ -1092,3 +1093,12 @@ test('really big numeric prerelease value', function (t) {
t.strictSame(r.prerelease, [ 'beta', '90071992547409910' ])
t.end()
})

test('long build id', function (t) {
var longBuild = '-928490632884417731e7af463c92b034d6a78268fc993bcb88a57944'
var shortVersion = '1.1.1'
var longVersion = Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER
t.equal(semver.valid(shortVersion + longBuild), shortVersion + longBuild)
t.equal(semver.valid(longVersion + longBuild), longVersion + longBuild)
t.end()
})
14 changes: 14 additions & 0 deletions test/re.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
var test = require('tap').test
var semver = require('../')

test('has a list of src, re, and safeRe', function (t) {
semver.re.forEach(function (r) { return t.match(r, RegExp, 'regexps are regexps') })
semver.src.forEach(function (s) { return t.match(s, String, 'src is strings') })

semver.safeRe.forEach(function (r) {
t.notMatch(r.source, '\\s+', 'safe regex do not contain greedy whitespace')
t.notMatch(r.source, '\\s*', 'safe regex do not contain greedy whitespace')
})

t.end()
})
63 changes: 63 additions & 0 deletions test/whitespace.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
var test = require('tap').test
var semver = require('../')

var validRange = semver.validRange
var SemVer = semver.SemVer
var Range = semver.Range
var Comparator = semver.Comparator
var minVersion = semver.minVersion
var minSatisfying = semver.minSatisfying
var maxSatisfying = semver.maxSatisfying

function s(n, char) {
if (!n) {
n = 500000
}
if (!char) {
char = ' '
}
var c = ''
for (var i = 0; i < n; i++) {
c += char
}
return c
}

test('regex dos via range whitespace', function (t) {
// a range with this much whitespace would take a few minutes to process if
// any redos susceptible regexes were used. there is a global tap timeout per
// file set in the package.json that will error if this test takes too long.
var r = `1.2.3 ${s()} <1.3.0`

t.equal(new Range(r).range, '1.2.3 <1.3.0')
t.equal(validRange(r), '1.2.3 <1.3.0')
t.equal(minVersion(r).version, '1.2.3')
t.equal(minSatisfying(['1.2.3'], r), '1.2.3')
t.equal(maxSatisfying(['1.2.3'], r), '1.2.3')

t.end()
})

test('range with 0', function (t) {
var r = `1.2.3 ${s(null, '0')} <1.3.0`
t.throws(function () { return new Range(r).range })
t.equal(validRange(r), null)
t.throws(function () { return minVersion(r).version })
t.equal(minSatisfying(['1.2.3'], r), null)
t.equal(maxSatisfying(['1.2.3'], r), null)
t.end()
})

test('semver version', function (t) {
var v = `${s(125)}1.2.3${s(125)}`
var tooLong = `${s()}1.2.3${s()}`
t.equal(new SemVer(v).version, '1.2.3')
t.throws(function () { return new SemVer(tooLong) })
t.end()
})

test('comparator', function (t) {
var c = `${s()}<${s()}1.2.3${s()}`
t.equal(new Comparator(c).value, '<1.2.3')
t.end()
})