Skip to content

Commit

Permalink
feat: decode percent-encoded path in getPath (#2714)
Browse files Browse the repository at this point in the history
* feat: decode percent-encoded path in `getPath`

* refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`

* test: add tests for decoding URI in path

* chore: denoify

* Revert "refactor: Stop decoding URIs in the `param()` method, since they are already decoded in `getPath()`"

This reverts commit 7192497.

* refactor: Replace "%25" before applying decodeURI() for avoid double decoding

Co-authored-by: Szymon Marczak <[email protected]>

* chore: denoify

* refactor(utils): check existence of "%25" before replacing it with "%2525"

Co-authored-by: Szymon Marczak <[email protected]>

* feat(utils/url): Changed URL decoding to skip invalid sequences and decode as much as possible

* chore: denoify

---------

Co-authored-by: Szymon Marczak <[email protected]>
  • Loading branch information
usualoma and szmarczak authored May 22, 2024
1 parent 8cb59e4 commit 778ac10
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 10 deletions.
54 changes: 50 additions & 4 deletions benchmarks/utils/src/get-path.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,61 @@ bench('noop', () => {})
const request = new Request('http://localhost/about/me')

group('getPath', () => {
bench('slice + indexOf', () => {
bench('slice + indexOf : w/o decodeURI', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
})

bench('regexp', () => {
bench('regexp : w/o decodeURI', () => {
const match = request.url.match(/^https?:\/\/[^/]+(\/[^?]*)/)
match ? match[1] : ''
return match ? match[1] : ''
})

bench('slice + indexOf', () => {
const url = request.url
const queryIndex = url.indexOf('?', 8)
const path = url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
return path.includes('%') ? decodeURIComponent(path) : path
})

bench('slice + for-loop + flag', () => {
const url = request.url
let start = url.indexOf('/', 8)
let i = start
let hasPercentEncoding = false
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
hasPercentEncoding = true
} else if (charCode === 63) {
// '?'
break
}
}
return hasPercentEncoding ? decodeURIComponent(url.slice(start, i)) : url.slice(start, i)
})

bench('slice + for-loop + immediate return', () => {
const url = request.url
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return decodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
})
})

Expand Down
43 changes: 40 additions & 3 deletions deno_dist/utils/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}

/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}

export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}

export const getQueryStrings = (url: string): string => {
Expand Down
83 changes: 83 additions & 0 deletions src/hono.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,89 @@ describe('Routing', () => {
expect(res.status).toBe(404)
})
})

describe('Encoded path', () => {
let app: Hono
beforeEach(() => {
app = new Hono()
})

it('should decode path parameter', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/%C3%A7awa%20y%C3%AE%3F')
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is çawa yî?')
})

it('should decode "/"', async () => {
app.get('/users/:id', (c) => c.text(`id is ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/hono%2Fposts') // %2F is '/'
expect(res.status).toBe(200)
expect(await res.text()).toBe('id is hono/posts')
})

it('should decode alphabets', async () => {
app.get('/users/static', (c) => c.text('static'))

const res = await app.request('http://localhost/users/%73tatic') // %73 is 's'
expect(res.status).toBe(200)
expect(await res.text()).toBe('static')
})

it('should decode alphabets with invalid UTF-8 sequence', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})

const res = await app.request('http://localhost/%73tatic/%A4%A2') // %73 is 's', %A4%A2 is invalid UTF-8 sequence
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %A4%A2')
})

it('should decode alphabets with invalid percent encoding', async () => {
app.get('/static/:path', (c) => {
try {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
} catch (e) {
return c.text(`by c.req.url: ${c.req.url.replace(/.*\//, '')}`)
}
})

const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(200)
expect(await res.text()).toBe('by c.req.url: %a')
})

it('should be able to catch URIError', async () => {
app.onError((err, c) => {
if (err instanceof URIError) {
return c.text(err.message, 400)
}
throw err
})
app.get('/static/:path', (c) => {
return c.text(`by c.req.param: ${c.req.param('path')}`) // this should throw an error
})

const res = await app.request('http://localhost/%73tatic/%a') // %73 is 's', %a is invalid percent encoding
expect(res.status).toBe(400)
expect(await res.text()).toBe('URI malformed')
})

it('should not double decode', async () => {
app.get('/users/:id', (c) => c.text(`posts of ${c.req.param('id')}`))

const res = await app.request('http://localhost/users/%2525') // %25 is '%'
expect(res.status).toBe(200)
expect(await res.text()).toBe('posts of %25')
})
})
})

describe('param and query', () => {
Expand Down
43 changes: 40 additions & 3 deletions src/utils/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,48 @@ export const getPattern = (label: string): Pattern | null => {
return null
}

/**
* Try to apply decodeURI() to given string.
* If it fails, skip invalid percent encoding or invalid UTF-8 sequences, and apply decodeURI() to the rest as much as possible.
* @param str The string to decode.
* @returns The decoded string that sometimes contains undecodable percent encoding.
* @example
* tryDecodeURI('Hello%20World') // 'Hello World'
* tryDecodeURI('Hello%20World/%A4%A2') // 'Hello World/%A4%A2'
*/
const tryDecodeURI = (str: string): string => {
try {
return decodeURI(str)
} catch {
return str.replace(/(?:%[0-9A-Fa-f]{2})+/g, (match) => {
try {
return decodeURI(match)
} catch {
return match
}
})
}
}

export const getPath = (request: Request): string => {
// Optimized: indexOf() + slice() is faster than RegExp
const url = request.url
const queryIndex = url.indexOf('?', 8)
return url.slice(url.indexOf('/', 8), queryIndex === -1 ? undefined : queryIndex)
const start = url.indexOf('/', 8)
let i = start
for (; i < url.length; i++) {
const charCode = url.charCodeAt(i)
if (charCode === 37) {
// '%'
// If the path contains percent encoding, use `indexOf()` to find '?' and return the result immediately.
// Although this is a performance disadvantage, it is acceptable since we prefer cases that do not include percent encoding.
const queryIndex = url.indexOf('?', i)
const path = url.slice(start, queryIndex === -1 ? undefined : queryIndex)
return tryDecodeURI(path.includes('%25') ? path.replace(/%25/g, '%2525') : path)
} else if (charCode === 63) {
// '?'
break
}
}
return url.slice(start, i)
}

export const getQueryStrings = (url: string): string => {
Expand Down

0 comments on commit 778ac10

Please sign in to comment.