Skip to content

Commit

Permalink
Improve uri.parseQuery to never raise an error
Browse files Browse the repository at this point in the history
In case of malformed query string where there is `=` on the value, handle
this character as part of the value instead of throwing an error.

The following query string should no longer crash a program:

    key=value&key2=x=1

It will be interpreted as [("key", "value"), ("key2", "x=1")]

This is correct according to latest WhatWG's HTML5 specification
recarding the urlencoded parser:
https://url.spec.whatwg.org/#concept-urlencoded-parser

Older behavior can be restored using the -d:nimLegacyParseQueryStrict
flag.
  • Loading branch information
mildred committed Jan 11, 2021
1 parent fd5c8ef commit 55a5deb
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 24 deletions.
7 changes: 7 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@
with other backends. see #9125. Use `-d:nimLegacyJsRound` for previous behavior.
- Added `socketstream` module that wraps sockets in the stream interface

- Changed the behavior of `uri.decodeQuery` when there are unencoded `=`
characters in the decoded values. Prior versions would raise an error. This is
no longer the case to comply with the HTML spec and other languages
implementations. Old behavior can be obtained with
`-d:nimLegacyParseQueryStrict`. `cgi.decodeData` which uses the same
underlying code is also updated the same way.




Expand Down
14 changes: 4 additions & 10 deletions lib/pure/cgi.nim
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,17 @@ proc getEncodedData(allowedMethods: set[RequestMethod]): string =
iterator decodeData*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes CGI data and yields the (name, value) pairs the
## data consists of.
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)

iterator decodeData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): tuple[key, value: TaintedString] =
## Reads and decodes CGI data and yields the (name, value) pairs the
## data consists of. If the client does not use a method listed in the
## `allowedMethods` set, a `CgiError` exception is raised.
let data = getEncodedData(allowedMethods)
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)

proc readData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): StringTableRef =
Expand Down
36 changes: 22 additions & 14 deletions lib/pure/uri.nim
Original file line number Diff line number Diff line change
Expand Up @@ -162,21 +162,25 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true,

iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes query string ``data`` and yields the (key, value) pairs the
## data consists of.
## data consists of. If compiled with -d:nimLegacyParseQueryStrict, an error
## is raised when there is an unencoded ``=`` character in a decoded value,
## which was the behavior in Nim < 1.6
runnableExamples:
import std/sugar
let s = collect(newSeq):
for k, v in decodeQuery("foo=1&bar=2"): (k, v)
doAssert s == @[("foo", "1"), ("bar", "2")]
import std/sequtils
doAssert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
doAssert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]

proc parseData(data: string, i: int, field: var string): int =
proc parseData(data: string, i: int, field: var string, sep: char): int =
result = i
while result < data.len:
case data[result]
let c = data[result]
case c
of '%': add(field, decodePercent(data, result))
of '+': add(field, ' ')
of '=', '&': break
else: add(field, data[result])
of '&': break
else:
if c == sep: break
else: add(field, data[result])
inc(result)

var i = 0
Expand All @@ -185,16 +189,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
# decode everything in one pass:
while i < data.len:
setLen(name, 0) # reuse memory
i = parseData(data, i, name)
i = parseData(data, i, name, '=')
setLen(value, 0) # reuse memory
if i < data.len and data[i] == '=':
inc(i) # skip '='
i = parseData(data, i, value)
when defined(nimLegacyParseQueryStrict):
i = parseData(data, i, value, '=')
else:
i = parseData(data, i, value, '&')
yield (name.TaintedString, value.TaintedString)
if i < data.len:
if data[i] == '&': inc(i)
else:
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
when defined(nimLegacyParseQueryStrict):
if data[i] != '&':
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
inc(i)

func parseAuthority(authority: string, result: var Uri) =
var i = 0
Expand Down

0 comments on commit 55a5deb

Please sign in to comment.