Skip to content

Commit

Permalink
fix: better handling of whitespace (#591)
Browse files Browse the repository at this point in the history
This is a backport of the following commits squashed to land on
`release/v6`:

- 717534e
- abdd93d
- cc6fde2
- 99d8287

Ref: #564


Co-authored-by: joaomoreno <[email protected]>
Co-authored-by: nicolo-ribaudo <[email protected]>
  • Loading branch information
3 people authored Jul 10, 2023
1 parent 39f6326 commit 928e56d
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 24 deletions.
95 changes: 71 additions & 24 deletions semver.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER ||
// Max safe segment length for coercion.
var MAX_SAFE_COMPONENT_LENGTH = 16

var MAX_SAFE_BUILD_LENGTH = MAX_LENGTH - 6

// The actual regexps go on exports.re
var re = exports.re = []
var safeRe = exports.safeRe = []
var src = exports.src = []
var t = exports.tokens = {}
var R = 0
Expand All @@ -36,6 +39,31 @@ function tok (n) {
t[n] = R++
}

var LETTERDASHNUMBER = '[a-zA-Z0-9-]'

// Replace some greedy regex tokens to prevent regex dos issues. These regex are
// used internally via the safeRe object since all inputs in this library get
// normalized first to trim and collapse all extra whitespace. The original
// regexes are exported for userland consumption and lower level usage. A
// future breaking change could export the safer regex only with a note that
// all input should have extra whitespace removed.
var safeRegexReplacements = [
['\\s', 1],
['\\d', MAX_LENGTH],
[LETTERDASHNUMBER, MAX_SAFE_BUILD_LENGTH],
]

function makeSafeRe (value) {
for (var i = 0; i < safeRegexReplacements.length; i++) {
var token = safeRegexReplacements[i][0]
var max = safeRegexReplacements[i][1]
value = value
.split(token + '*').join(token + '{0,' + max + '}')
.split(token + '+').join(token + '{1,' + max + '}')
}
return value
}

// The following Regular Expressions can be used for tokenizing,
// validating, and parsing SemVer version strings.

Expand All @@ -45,14 +73,14 @@ function tok (n) {
tok('NUMERICIDENTIFIER')
src[t.NUMERICIDENTIFIER] = '0|[1-9]\\d*'
tok('NUMERICIDENTIFIERLOOSE')
src[t.NUMERICIDENTIFIERLOOSE] = '[0-9]+'
src[t.NUMERICIDENTIFIERLOOSE] = '\\d+'

// ## Non-numeric Identifier
// Zero or more digits, followed by a letter or hyphen, and then zero or
// more letters, digits, or hyphens.

tok('NONNUMERICIDENTIFIER')
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-][a-zA-Z0-9-]*'
src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-]' + LETTERDASHNUMBER + '*'

// ## Main Version
// Three dot-separated numeric identifiers.
Expand Down Expand Up @@ -94,7 +122,7 @@ src[t.PRERELEASELOOSE] = '(?:-?(' + src[t.PRERELEASEIDENTIFIERLOOSE] +
// Any combination of digits, letters, or hyphens.

tok('BUILDIDENTIFIER')
src[t.BUILDIDENTIFIER] = '[0-9A-Za-z-]+'
src[t.BUILDIDENTIFIER] = LETTERDASHNUMBER + '+'

// ## Build Metadata
// Plus sign, followed by one or more period-separated build metadata
Expand Down Expand Up @@ -174,6 +202,7 @@ src[t.COERCE] = '(^|[^\\d])' +
'(?:$|[^\\d])'
tok('COERCERTL')
re[t.COERCERTL] = new RegExp(src[t.COERCE], 'g')
safeRe[t.COERCERTL] = new RegExp(makeSafeRe(src[t.COERCE]), 'g')

// Tilde ranges.
// Meaning is "reasonably at or greater than"
Expand All @@ -183,6 +212,7 @@ src[t.LONETILDE] = '(?:~>?)'
tok('TILDETRIM')
src[t.TILDETRIM] = '(\\s*)' + src[t.LONETILDE] + '\\s+'
re[t.TILDETRIM] = new RegExp(src[t.TILDETRIM], 'g')
safeRe[t.TILDETRIM] = new RegExp(makeSafeRe(src[t.TILDETRIM]), 'g')
var tildeTrimReplace = '$1~'

tok('TILDE')
Expand All @@ -198,6 +228,7 @@ src[t.LONECARET] = '(?:\\^)'
tok('CARETTRIM')
src[t.CARETTRIM] = '(\\s*)' + src[t.LONECARET] + '\\s+'
re[t.CARETTRIM] = new RegExp(src[t.CARETTRIM], 'g')
safeRe[t.CARETTRIM] = new RegExp(makeSafeRe(src[t.CARETTRIM]), 'g')
var caretTrimReplace = '$1^'

tok('CARET')
Expand All @@ -219,6 +250,7 @@ src[t.COMPARATORTRIM] = '(\\s*)' + src[t.GTLT] +

// this one has to use the /g flag
re[t.COMPARATORTRIM] = new RegExp(src[t.COMPARATORTRIM], 'g')
safeRe[t.COMPARATORTRIM] = new RegExp(makeSafeRe(src[t.COMPARATORTRIM]), 'g')
var comparatorTrimReplace = '$1$2$3'

// Something like `1.2.3 - 1.2.4`
Expand Down Expand Up @@ -247,6 +279,14 @@ for (var i = 0; i < R; i++) {
debug(i, src[i])
if (!re[i]) {
re[i] = new RegExp(src[i])

// Replace all greedy whitespace to prevent regex dos issues. These regex are
// used internally via the safeRe object since all inputs in this library get
// normalized first to trim and collapse all extra whitespace. The original
// regexes are exported for userland consumption and lower level usage. A
// future breaking change could export the safer regex only with a note that
// all input should have extra whitespace removed.
safeRe[i] = new RegExp(makeSafeRe(src[i]))
}
}

Expand All @@ -271,7 +311,7 @@ function parse (version, options) {
return null
}

var r = options.loose ? re[t.LOOSE] : re[t.FULL]
var r = options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL]
if (!r.test(version)) {
return null
}
Expand Down Expand Up @@ -326,7 +366,7 @@ function SemVer (version, options) {
this.options = options
this.loose = !!options.loose

var m = version.trim().match(options.loose ? re[t.LOOSE] : re[t.FULL])
var m = version.trim().match(options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL])

if (!m) {
throw new TypeError('Invalid Version: ' + version)
Expand Down Expand Up @@ -771,6 +811,7 @@ function Comparator (comp, options) {
return new Comparator(comp, options)
}

comp = comp.trim().split(/\s+/).join(' ')
debug('comparator', comp, options)
this.options = options
this.loose = !!options.loose
Expand All @@ -787,7 +828,7 @@ function Comparator (comp, options) {

var ANY = {}
Comparator.prototype.parse = function (comp) {
var r = this.options.loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
var r = this.options.loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
var m = comp.match(r)

if (!m) {
Expand Down Expand Up @@ -911,17 +952,24 @@ function Range (range, options) {
this.loose = !!options.loose
this.includePrerelease = !!options.includePrerelease

// First, split based on boolean or ||
// First reduce all whitespace as much as possible so we do not have to rely
// on potentially slow regexes like \s*. This is then stored and used for
// future error messages as well.
this.raw = range
this.set = range.split(/\s*\|\|\s*/).map(function (range) {
.trim()
.split(/\s+/)
.join(' ')

// First, split based on boolean or ||
this.set = this.raw.split('||').map(function (range) {
return this.parseRange(range.trim())
}, this).filter(function (c) {
// throw out any that are not relevant for whatever reason
return c.length
})

if (!this.set.length) {
throw new TypeError('Invalid SemVer Range: ' + range)
throw new TypeError('Invalid SemVer Range: ' + this.raw)
}

this.format()
Expand All @@ -940,28 +988,27 @@ Range.prototype.toString = function () {

Range.prototype.parseRange = function (range) {
var loose = this.options.loose
range = range.trim()
// `1.2.3 - 1.2.4` => `>=1.2.3 <=1.2.4`
var hr = loose ? re[t.HYPHENRANGELOOSE] : re[t.HYPHENRANGE]
var hr = loose ? safeRe[t.HYPHENRANGELOOSE] : safeRe[t.HYPHENRANGE]
range = range.replace(hr, hyphenReplace)
debug('hyphen replace', range)
// `> 1.2.3 < 1.2.5` => `>1.2.3 <1.2.5`
range = range.replace(re[t.COMPARATORTRIM], comparatorTrimReplace)
debug('comparator trim', range, re[t.COMPARATORTRIM])
range = range.replace(safeRe[t.COMPARATORTRIM], comparatorTrimReplace)
debug('comparator trim', range, safeRe[t.COMPARATORTRIM])

// `~ 1.2.3` => `~1.2.3`
range = range.replace(re[t.TILDETRIM], tildeTrimReplace)
range = range.replace(safeRe[t.TILDETRIM], tildeTrimReplace)

// `^ 1.2.3` => `^1.2.3`
range = range.replace(re[t.CARETTRIM], caretTrimReplace)
range = range.replace(safeRe[t.CARETTRIM], caretTrimReplace)

// normalize spaces
range = range.split(/\s+/).join(' ')

// At this point, the range is completely trimmed and
// ready to be split into comparators.

var compRe = loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR]
var compRe = loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR]
var set = range.split(' ').map(function (comp) {
return parseComparator(comp, this.options)
}, this).join(' ').split(/\s+/)
Expand Down Expand Up @@ -1061,7 +1108,7 @@ function replaceTildes (comp, options) {
}

function replaceTilde (comp, options) {
var r = options.loose ? re[t.TILDELOOSE] : re[t.TILDE]
var r = options.loose ? safeRe[t.TILDELOOSE] : safeRe[t.TILDE]
return comp.replace(r, function (_, M, m, p, pr) {
debug('tilde', comp, _, M, m, p, pr)
var ret
Expand Down Expand Up @@ -1102,7 +1149,7 @@ function replaceCarets (comp, options) {

function replaceCaret (comp, options) {
debug('caret', comp, options)
var r = options.loose ? re[t.CARETLOOSE] : re[t.CARET]
var r = options.loose ? safeRe[t.CARETLOOSE] : safeRe[t.CARET]
return comp.replace(r, function (_, M, m, p, pr) {
debug('caret', comp, _, M, m, p, pr)
var ret
Expand Down Expand Up @@ -1161,7 +1208,7 @@ function replaceXRanges (comp, options) {

function replaceXRange (comp, options) {
comp = comp.trim()
var r = options.loose ? re[t.XRANGELOOSE] : re[t.XRANGE]
var r = options.loose ? safeRe[t.XRANGELOOSE] : safeRe[t.XRANGE]
return comp.replace(r, function (ret, gtlt, M, m, p, pr) {
debug('xRange', comp, ret, gtlt, M, m, p, pr)
var xM = isX(M)
Expand Down Expand Up @@ -1236,7 +1283,7 @@ function replaceXRange (comp, options) {
function replaceStars (comp, options) {
debug('replaceStars', comp, options)
// Looseness is ignored here. star is always as loose as it gets!
return comp.trim().replace(re[t.STAR], '')
return comp.trim().replace(safeRe[t.STAR], '')
}

// This function is passed to string.replace(re[t.HYPHENRANGE])
Expand Down Expand Up @@ -1562,7 +1609,7 @@ function coerce (version, options) {

var match = null
if (!options.rtl) {
match = version.match(re[t.COERCE])
match = version.match(safeRe[t.COERCE])
} else {
// Find the right-most coercible string that does not share
// a terminus with a more left-ward coercible string.
Expand All @@ -1573,17 +1620,17 @@ function coerce (version, options) {
// Stop when we get a match that ends at the string end, since no
// coercible string can be more right-ward without the same terminus.
var next
while ((next = re[t.COERCERTL].exec(version)) &&
while ((next = safeRe[t.COERCERTL].exec(version)) &&
(!match || match.index + match[0].length !== version.length)
) {
if (!match ||
next.index + next[0].length !== match.index + match[0].length) {
match = next
}
re[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
safeRe[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length
}
// leave it in a clean state
re[t.COERCERTL].lastIndex = -1
safeRe[t.COERCERTL].lastIndex = -1
}

if (match === null) {
Expand Down
10 changes: 10 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ test('negative range tests', function (t) {
['*', 'not a version'],
['>=2', 'glorp'],
['2.x', '3.0.0-pre.0', { includePrerelease: true }],
['== 1.0.0 || foo', '2.0.0', { loose: true }]
].forEach(function (v) {
var range = v[0]
var ver = v[1]
Expand Down Expand Up @@ -1092,3 +1093,12 @@ test('really big numeric prerelease value', function (t) {
t.strictSame(r.prerelease, [ 'beta', '90071992547409910' ])
t.end()
})

test('long build id', function (t) {
var longBuild = '-928490632884417731e7af463c92b034d6a78268fc993bcb88a57944'
var shortVersion = '1.1.1'
var longVersion = Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER
t.equal(semver.valid(shortVersion + longBuild), shortVersion + longBuild)
t.equal(semver.valid(longVersion + longBuild), longVersion + longBuild)
t.end()
})
14 changes: 14 additions & 0 deletions test/re.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
var test = require('tap').test
var semver = require('../')

test('has a list of src, re, and safeRe', function (t) {
semver.re.forEach(function (r) { return t.match(r, RegExp, 'regexps are regexps') })
semver.src.forEach(function (s) { return t.match(s, String, 'src is strings') })

semver.safeRe.forEach(function (r) {
t.notMatch(r.source, '\\s+', 'safe regex do not contain greedy whitespace')
t.notMatch(r.source, '\\s*', 'safe regex do not contain greedy whitespace')
})

t.end()
})
63 changes: 63 additions & 0 deletions test/whitespace.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
var test = require('tap').test
var semver = require('../')

var validRange = semver.validRange
var SemVer = semver.SemVer
var Range = semver.Range
var Comparator = semver.Comparator
var minVersion = semver.minVersion
var minSatisfying = semver.minSatisfying
var maxSatisfying = semver.maxSatisfying

function s(n, char) {
if (!n) {
n = 500000
}
if (!char) {
char = ' '
}
var c = ''
for (var i = 0; i < n; i++) {
c += char
}
return c
}

test('regex dos via range whitespace', function (t) {
// a range with this much whitespace would take a few minutes to process if
// any redos susceptible regexes were used. there is a global tap timeout per
// file set in the package.json that will error if this test takes too long.
var r = `1.2.3 ${s()} <1.3.0`

t.equal(new Range(r).range, '1.2.3 <1.3.0')
t.equal(validRange(r), '1.2.3 <1.3.0')
t.equal(minVersion(r).version, '1.2.3')
t.equal(minSatisfying(['1.2.3'], r), '1.2.3')
t.equal(maxSatisfying(['1.2.3'], r), '1.2.3')

t.end()
})

test('range with 0', function (t) {
var r = `1.2.3 ${s(null, '0')} <1.3.0`
t.throws(function () { return new Range(r).range })
t.equal(validRange(r), null)
t.throws(function () { return minVersion(r).version })
t.equal(minSatisfying(['1.2.3'], r), null)
t.equal(maxSatisfying(['1.2.3'], r), null)
t.end()
})

test('semver version', function (t) {
var v = `${s(125)}1.2.3${s(125)}`
var tooLong = `${s()}1.2.3${s()}`
t.equal(new SemVer(v).version, '1.2.3')
t.throws(function () { return new SemVer(tooLong) })
t.end()
})

test('comparator', function (t) {
var c = `${s()}<${s()}1.2.3${s()}`
t.equal(new Comparator(c).value, '<1.2.3')
t.end()
})

0 comments on commit 928e56d

Please sign in to comment.