Skip to content

Commit

Permalink
support single line comments and fix cte parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jsumners-nr committed Nov 11, 2024
1 parent 1850f48 commit c4b6794
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 6 deletions.
68 changes: 67 additions & 1 deletion lib/db/query-parsers/sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ const stringify = require('json-stringify-safe')
*
* @type {RegExp}
*/
const selectSubquery = /from\s*?\((?<subquery>select.*?)\)\s*?/gi
const selectSubquery = /from\s*?\((?<subquery>select.*?)\)\s*?/i

/**
* Matches queries with leading common table expressions and assigns the
* actual query to a match group named `query`.
*
* @type {RegExp}
*/
const cteMatcher = /^\s*?with[\w\W]*?\)\s*?(?<query>(?:insert|update|delete|select)[\w\W]*)/i

/**
* Parses a SQL statement into the parts we want to report as metadata in
Expand Down Expand Up @@ -50,11 +58,20 @@ module.exports = function parseSql(sql, { logger = defaultLogger } = {}) {
}

sql = removeMultiLineComments(sql).trim()
sql = removeSingleLineComments(sql).trim()
let result = {
operation: 'other',
collection: null,
query: sql
}

// We want to remove the CTE _after_ assigning the statement to the result's
// `query` property. Otherwise, the actual query will not be recorded in
// the trace.
sql = removeCte(sql)

// After all of our normalizing of the overall query, if it doesn't actually
// look like an SQL statement, short-circuit the parsing routine.
if (looksLikeValidSql(sql) === false) {
return result
}
Expand Down Expand Up @@ -139,6 +156,55 @@ function removeMultiLineComments(input) {
return removeMultiLineComments(`${part1} ${part2}`)
}

/**
* Removes all single line, and trailing, comments from the input query.
* These are comments that start with `--` or `#`.
*
* @param {string} input The query that might contain comments.
* @returns {string} The query without any comments.
*/
function removeSingleLineComments(input) {
const resultLines = []
const lines = input.split('\n')
for (let i = 0; i < lines.length; i += 1) {
let line = lines[i]
if (/^(--|#)/.test(line) === true) {
continue
}
let pos = line.indexOf(' --')
if (pos > -1) {
line = line.slice(0, pos)
resultLines.push(line)
continue
}
pos = line.indexOf(' #')
if (pos > -1) {
line = line.slice(0, pos)
resultLines.push(line)
continue
}

resultLines.push(line)
}
return resultLines.join('\n')
}

/**
* Removes any leading common table expression (CTE) from the query and returns
* the query that targets the CTE. The metadata we are interested in, is not
* contained in the CTE, but in the query targeting the CTE.
*
* @param {string} statement The SQL statement that might have a CTE.
* @returns {string} The SQL statement without a leading CTE.
*/
function removeCte(statement) {
const matches = cteMatcher.exec(statement)
if (matches === null) {
return statement
}
return matches.groups.query
}

/**
* Tests the start of the statement to determine if it looks like a valid
* SQL statement.
Expand Down
42 changes: 37 additions & 5 deletions test/unit/db/query-parsers/sql.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,32 @@ test('logs correctly if input is incorrect', () => {
assert.equal(logs[0][1], 'Unable to stringify SQL')
})

// test('reports correct info if inline comments present', () => {
// const statement = `--insert into`
// })
test('reports correct info if single line comments present', () => {
const expected = { operation: 'insert', collection: 'bar', table: 'bar' }
let statement = `-- insert into bar some stuff
insert into bar
(col1, col2) -- the columns
values('a', 'b') -- the values
`
let found = parseSql(statement)
match(found, expected)

statement = `# insert into bar some stuff
insert into bar
(col1, col2) # the columns
values('a', 'b') # the values
`
found = parseSql(statement)
match(found, expected)

statement = `--insert into bar some stuff
insert into bar
(col1, col2) --the columns
values('--hoorah', '#foobar') #the values
`
found = parseSql(statement)
match(found, expected)
})

test('reports correct info if multi-line comments present', () => {
const expected = { operation: 'insert', collection: 'foo', table: 'foo' }
Expand Down Expand Up @@ -284,7 +307,7 @@ test('handles fully qualified names', () => {
})

test('handles leading CTE', () => {
const statement = `with cte1 as (
let statement = `with cte1 as (
select
linking_col
from
Expand All @@ -297,13 +320,22 @@ test('handles leading CTE', () => {
join cte1 linking_col
where
a.bar_col = 'bar'`
const found = parseSql(statement)
let found = parseSql(statement)
match(found, {
operation: 'select',
collection: 'foo_table',
table: 'foo_table',
database: undefined
})

statement = `with cte1 as (select * from foo) update bar set bar.a = cte1.a`
found = parseSql(statement)
match(found, {
operation: 'update',
collection: 'bar',
table: 'bar',
database: undefined
})
})

test('maps `SELECT ? + ? AS solution` to "unknown" collection', () => {
Expand Down

0 comments on commit c4b6794

Please sign in to comment.