-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
More advanced table extractor to handle compound queries #107
Merged
Merged
Changes from 3 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e084efd
More advanced table extractor, supported compound queries
crisptrutski 1f93963
Cljfmt
crisptrutski 76dd401
Whoops, test analysis for the two new queries
crisptrutski dfa8640
Test in parallel
crisptrutski ffaf13a
Linter doesn't understand namespace annotation
crisptrutski fcb9111
More value assertions. Require explicit skip
crisptrutski File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package com.metabase.macaw; | ||
|
||
public class AnalysisError extends RuntimeException { | ||
public final AnalysisErrorType errorType; | ||
public final Throwable cause; | ||
|
||
public AnalysisError(AnalysisErrorType errorType) { | ||
this.errorType = errorType; | ||
this.cause = null; | ||
} | ||
|
||
public AnalysisError(AnalysisErrorType errorType, Throwable cause) { | ||
this.errorType = errorType; | ||
this.cause = cause; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package com.metabase.macaw; | ||
|
||
public enum AnalysisErrorType { | ||
UNSUPPORTED_EXPRESSION, | ||
INVALID_QUERY, | ||
UNABLE_TO_PARSE | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
package com.metabase.macaw; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.schema.Table; | ||
import net.sf.jsqlparser.statement.Statement; | ||
import net.sf.jsqlparser.statement.select.*; | ||
|
||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.Stack; | ||
import java.util.function.Consumer; | ||
|
||
@SuppressWarnings({"PatternVariableCanBeUsed", "IfCanBeSwitch"}) // don't force a newer JVM version | ||
public final class CompoundTableExtractor { | ||
|
||
/** | ||
* BEWARE this may return duplicates if the same table is referenced multiple times in the query. | ||
* We need to deduplicate these with value semantics. | ||
* A better solution would be to have our own TableValue class to convert to, before accumulating. | ||
*/ | ||
public static Set<Table> getTables(Statement statement) { | ||
try { | ||
if (statement instanceof Select) { | ||
return accTables((Select) statement); | ||
} | ||
// This is not a query, it's probably a statement. | ||
throw new AnalysisError(AnalysisErrorType.INVALID_QUERY); | ||
} catch (IllegalArgumentException e) { | ||
// This query uses features that we do not yet support translating. | ||
throw new AnalysisError(AnalysisErrorType.UNABLE_TO_PARSE, e); | ||
} | ||
} | ||
|
||
private static Set<Table> accTables(Select select) { | ||
Set<Table> tables = new HashSet<>(); | ||
Stack<Set<String>> cteAliasScopes = new Stack<>(); | ||
accTables(select, tables, cteAliasScopes); | ||
return tables; | ||
} | ||
|
||
private static void accTables(Select select, Set<Table> tables, Stack<Set<String>> cteAliasScopes) { | ||
if (select instanceof PlainSelect) { | ||
accTables(select.getPlainSelect(), tables, cteAliasScopes); | ||
} else if (select instanceof ParenthesedSelect) { | ||
accTables(((ParenthesedSelect) select).getSelect(), tables, cteAliasScopes); | ||
} else if (select instanceof SetOperationList) { | ||
for (Select innerSelect : ((SetOperationList) select).getSelects()) { | ||
accTables(innerSelect, tables, cteAliasScopes); | ||
} | ||
} else { | ||
// We don't support more complex kinds of select statements yet. | ||
throw new AnalysisError(AnalysisErrorType.UNSUPPORTED_EXPRESSION); | ||
} | ||
} | ||
|
||
private static void accTables(PlainSelect select, Set<Table> tables, Stack<Set<String>> cteAliasScopes) { | ||
// these are fine, but irrelevant | ||
// | ||
// - select.getDistinct() | ||
// - select.getHaving() | ||
// - select.getKsqlWindow() | ||
// - select.getMySqlHintStraightJoin() | ||
// - select.getMySqlSqlCacheFlag() | ||
// - select.getLimitBy() | ||
// - select.getOffset() | ||
// - select.getOptimizeFor() | ||
// - select.getOracleHint() | ||
// - select.getTop() | ||
// - select.getWait() | ||
|
||
// Not currently parseable | ||
if (select.getLateralViews() != null || | ||
select.getOracleHierarchical() != null || | ||
select.getWindowDefinitions() != null) { | ||
throw new AnalysisError(AnalysisErrorType.UNABLE_TO_PARSE); | ||
} | ||
|
||
final Set<String> cteAliases = new HashSet<>(); | ||
cteAliasScopes.push(cteAliases); | ||
|
||
if (select.getWithItemsList() != null) { | ||
for (WithItem withItem : select.getWithItemsList()) { | ||
if (withItem.isRecursive()) { | ||
cteAliases.add(withItem.getAlias().getName()); | ||
} | ||
accTables(withItem.getSelect(), tables, cteAliasScopes); | ||
// No hard in adding twice to a set | ||
crisptrutski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cteAliases.add(withItem.getAlias().getName()); | ||
} | ||
} | ||
|
||
// any of these - nope out | ||
if (select.getFetch() != null || | ||
select.getFirst() != null || | ||
select.getForClause() != null || | ||
select.getForMode() != null || | ||
select.getForUpdateTable() != null || | ||
select.getForXmlPath() != null || | ||
select.getIntoTables() != null || | ||
select.getIsolation() != null || | ||
select.getSkip() != null) { | ||
throw new AnalysisError(AnalysisErrorType.INVALID_QUERY); | ||
} | ||
|
||
for (SelectItem<?> item : select.getSelectItems()) { | ||
Expression expr = item.getExpression(); | ||
if (expr instanceof Select) { | ||
accTables((Select) expr, tables, cteAliasScopes); | ||
} | ||
} | ||
|
||
Consumer<FromItem> pushOrThrow = (FromItem item) -> { | ||
if (item instanceof Table) { | ||
Table table = (Table) item; | ||
String tableName = table.getName(); | ||
// Skip aliases | ||
if (cteAliasScopes.stream().noneMatch(scope -> scope.contains(tableName))) { | ||
if (tableName.contains("*")) { | ||
// Do not allow table wildcards. | ||
throw new AnalysisError(AnalysisErrorType.INVALID_QUERY); | ||
} else { | ||
tables.add(table); | ||
} | ||
} | ||
} else if (item instanceof TableFunction) { | ||
// Do not allow dynamic tables | ||
throw new AnalysisError(AnalysisErrorType.INVALID_QUERY); | ||
} else if (item instanceof Select) { | ||
accTables((Select) item, tables, cteAliasScopes); | ||
} else if (item != null) { | ||
// Only allow simple table references. | ||
throw new AnalysisError(AnalysisErrorType.UNSUPPORTED_EXPRESSION); | ||
} | ||
}; | ||
|
||
if (select.getFromItem() != null) { | ||
pushOrThrow.accept(select.getFromItem()); | ||
List<Join> joins = select.getJoins(); | ||
if (joins != null) { | ||
joins.stream().map(Join::getFromItem).forEach(pushOrThrow); | ||
} | ||
} | ||
|
||
cteAliasScopes.pop(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this
ParenthesedSelect
a nested select or what?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry the context has left my brain a bit now 😄
I think you're right, and from a practical point of view its a SELECT which gets bound to an alias.
I can't recall if this is both column and table bindings, or just one of those.