Skip to content

Commit

Permalink
add SQL union, intersect, and except query operators
Browse files Browse the repository at this point in the history
  • Loading branch information
agentm committed May 7, 2024
1 parent 5f93a27 commit a76c70b
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 8 deletions.
1 change: 1 addition & 0 deletions docs/tutd_cheatsheet.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Relational expressions query database state without being able to change it.
|`:showexpr s antijoin sp`|Display the result of all tuples in `s` which do not match appear in the result of `s semijoin sp`|
|`:showexpr s union s`| Display the result of `s` unioned with itself (which is equivalent to `s`)|
|`:showexpr s:{status2:=add(10,@status)}`| Display the result of extending the `s` relation variable with a new attribute which adds 10 to each `status` attribute|
|`:showexpr (s:{islondon:=if eq(@city,"London") then True else False}){city,islondon}`| Display the result of relation variable `s` extended with a new attribute `islondon` which is the result of a conditional. |
|`:showexpr s where lt(@status, 30)`|Display the result of `s` where the `status` is less than 30.|
|`:showexpr s relwhere (p{})`|Display the result of `s` if the `p` relation variable is non-empty.|
|`:showexpr s group ({sname,status,s#} as subrel)`| Display the result of grouping the `sname`, `status`, and `s#` into a subrel for each tuple in the `s` relation where the `city` attribute (not mentioned) is the grouping criteria|
Expand Down
87 changes: 87 additions & 0 deletions sql_optimizations_applied
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
https://blog.jooq.org/2017/09/28/10-cool-sql-optimisations-that-do-not-depend-on-the-cost-model/#top3

1. Transitive Closure - done

SELECT first_name, last_name, film_id
FROM actor a
JOIN film_actor fa ON a.actor_id = fa.actor_id
WHERE a.actor_id = 1;

-->

SELECT first_name, last_name, film_id
FROM actor a
JOIN film_actor fa ON a.actor_id = fa.actor_id
WHERE a.actor_id = 1
AND fa.actor_id = 1;

(x join y [on x.a = y.a]) where x.a = 1
->
(x where x.a = 1) join (y where y.a = 1)

or

x where a=@b and b=3
->
x where a=3 and b=3

2. Impossible Predicates - Done

s where 3 = 5
s where true -> s
s where false -> emptied s

3. Join Elimination - Done

SELECT first_name, last_name
FROM customer c
JOIN address a ON c.address_id = a.address_id

-->

SELECT first_name, last_name
FROM customer c


(x join y){x.attrs only} iff there is a foreign key constraint on the full join condition from x to y

4. Silly Predicates - done

where true -> X
where attr = attr -> X
insert s s where name = @name -> X

5. Projections in Exists Subqueries

Our exists clause is a projection against zero attributes already.

6. Predicate Merging - Done

where X and X -> where X
where X or X -> where X

7. Empty Sets

Use constraints to determine if a predicate is provably false:

constraint x > 100
where x = 10 -> where false

X join false -> x where false
x join true -> x where false

8. CHECK() constraints

not relevant - see 7

9. Unneeded self join - done

x join x -> x
(x where c1) join (x where c2) -> x where c1 and c2
(x where c1) union (x where c2) -> x where c1 or c2

10. Predicate Pushdown - done

(x where c1) where c2 -> x where c1 and c2 - done
x{proj} where c1 -> (x where c1){proj} #project on fewer tuples
(x union y) where c -> (x where c) union (y where c)
16 changes: 13 additions & 3 deletions src/bin/SQL/Interpreter/Select.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,19 @@ parseQuery :: Text -> Either ParserError Query
parseQuery = parse (queryP <* semi <* eof) "<interactive>"

queryP :: Parser Query
queryP = (QuerySelect <$> selectP) <|>
(QueryValues <$> valuesP) <|>
(QueryTable <$> tableP)
queryP = E.makeExprParser queryTermP queryOpP
where
queryTermP = (QuerySelect <$> selectP) <|>
(QueryValues <$> valuesP) <|>
(QueryTable <$> tableP)
queryOpP = [[infixOpP "union" UnionQueryOperator,
infixOpP "intersect" IntersectQueryOperator,
infixOpP "except" ExceptQueryOperator
]]
infixOpP nam op =
E.InfixL $ do
reserved nam
pure (\a b -> QueryOp op a b)

valuesP :: Parser [[ScalarExpr]]
valuesP = do
Expand Down
3 changes: 2 additions & 1 deletion src/lib/ProjectM36/Client.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,8 @@ convertSQLQuery sessionId (InProcessConnection conf) query = do
Right (session, _schema) -> do -- TODO: enable SQL to leverage isomorphic schemas
let ctx = Sess.concreteDatabaseContext session
reEnv = RE.mkRelationalExprEnv ctx transGraph
typeF = optimizeAndEvalRelationalExpr reEnv
typeF expr =
RE.runRelationalExprM reEnv (RE.typeForRelationalExpr expr)
-- convert SQL data into DataFrameExpr
case evalConvertM mempty (convertQuery typeF query) of
Left err -> pure (Left (SQLConversionError err))
Expand Down
1 change: 1 addition & 0 deletions src/lib/ProjectM36/Error.hs
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,6 @@ data SQLError = NotSupportedError T.Text |
AggregateGroupByMismatchError ProjectionScalarExpr |
GroupByColumnNotReferencedInGroupByError [ProjectionScalarExpr] |
UnsupportedGroupByProjectionError ProjectionScalarExpr |
QueryOperatorTypeMismatchError QueryOperator Attributes Attributes |
SQLRelationalError RelationalError
deriving (Show, Eq, Generic, Typeable, NFData)
24 changes: 24 additions & 0 deletions src/lib/ProjectM36/SQL/Convert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ type ConvertM = StateT TableContext (ExceptT SQLError Identity)
runConvertM :: TableContext -> ConvertM a -> Either SQLError (a, TableContext)
runConvertM tcontext m = runIdentity (runExceptT (runStateT m tcontext))

runLocalConvertM :: ConvertM a -> ConvertM a
runLocalConvertM m = do
saveState <- get
ret <- m
put saveState
pure ret

evalConvertM :: TableContext -> ConvertM a -> Either SQLError a
evalConvertM tcontext m = runIdentity (runExceptT (evalStateT m tcontext))

Expand Down Expand Up @@ -557,6 +564,23 @@ convertQuery typeF (QueryValues vals) = do
convertQuery _typeF (QueryTable tname) = do
rvName <- convertTableName tname
pure $ baseDFExpr { convertExpr = RelationVariable rvName () }
convertQuery typeF (QueryOp op q1 q2) = do
let dfErr = NotSupportedError ("ORDER BY/LIMIT/OFFSET in " <> T.pack (show op))
dfExpr1 <- runLocalConvertM (convertQuery typeF q1)
when (usesDataFrameFeatures dfExpr1) $ throwSQLE dfErr
dfType1 <- case typeF (convertExpr dfExpr1) of
Left err -> throwSQLE (SQLRelationalError err)
Right t -> pure t

dfExpr2 <- runLocalConvertM (convertQuery typeF q2)
when (usesDataFrameFeatures dfExpr2) $ throwSQLE dfErr
dfType2 <- case typeF (convertExpr dfExpr2) of
Left err -> throwSQLE (SQLRelationalError err)
Right t -> pure t

when (dfType1 /= dfType2) $ throwSQLE (QueryOperatorTypeMismatchError op (attributes dfType1) (attributes dfType2))

pure $ baseDFExpr { convertExpr = Union (convertExpr dfExpr1) (convertExpr dfExpr2) }

convertSelect :: TypeForRelExprF -> Select -> ConvertM DataFrameExpr
convertSelect typeF sel = do
Expand Down
7 changes: 6 additions & 1 deletion src/lib/ProjectM36/SQL/Select.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ import Data.Hashable

data Query = QuerySelect Select |
QueryValues [[ScalarExpr]] |
QueryTable TableName
QueryTable TableName |
QueryOp QueryOperator Query Query
deriving (Show, Eq, Generic, NFData)
deriving Serialise via WineryVariant Query

data QueryOperator = UnionQueryOperator | IntersectQueryOperator | ExceptQueryOperator
deriving (Show, Eq, Generic, NFData)
deriving Serialise via WineryVariant QueryOperator

data Select = Select { distinctness :: Maybe Distinctness,
projectionClause :: [SelectItem],
tableExpr :: Maybe TableExpr,
Expand Down
11 changes: 8 additions & 3 deletions src/lib/ProjectM36/StaticOptimizer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,18 @@ type GraphRefSOptDatabaseContextExprM a = ReaderT GraphRefSOptDatabaseContextExp
-- | A temporary function to be replaced by IO-based implementation.
optimizeAndEvalRelationalExpr :: RelationalExprEnv -> RelationalExpr -> Either RelationalError Relation
optimizeAndEvalRelationalExpr env expr = do
let gfExpr = runProcessExprM UncommittedContextMarker (processRelationalExpr expr) -- references parent tid instead of context! options- I could add the context to the graph with a new transid or implement an evalRelationalExpr in RE.hs to use the context (which is what I had previously)
graph = re_graph env
let graph = re_graph env
ctx = re_context env
gfEnv = freshGraphRefRelationalExprEnv (Just ctx) graph
optExpr <- runGraphRefSOptRelationalExprM (Just ctx) (re_graph env) (fullOptimizeGraphRefRelationalExpr gfExpr)
optExpr <- optimizeRelationalExpr env expr
runGraphRefRelationalExprM gfEnv (evalGraphRefRelationalExpr optExpr)

optimizeRelationalExpr :: RelationalExprEnv -> RelationalExpr -> Either RelationalError GraphRefRelationalExpr
optimizeRelationalExpr env expr = do
let gfExpr = runProcessExprM UncommittedContextMarker (processRelationalExpr expr) -- references parent tid instead of context! options- I could add the context to the graph with a new transid or implement an evalRelationalExpr in RE.hs to use the context (which is what I had previously)
ctx = re_context env
runGraphRefSOptRelationalExprM (Just ctx) (re_graph env) (fullOptimizeGraphRefRelationalExpr gfExpr)

class Monad m => AskGraphContext m where
askGraph :: m TransactionGraph
askContext :: m DatabaseContext
Expand Down

0 comments on commit a76c70b

Please sign in to comment.