Skip to content

Commit

Permalink
feat(impala): implement Table.sample
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist committed Oct 17, 2023
1 parent 6aa897e commit 8e78dfc
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
34 changes: 19 additions & 15 deletions ibis/backends/base/sql/compiler/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,11 @@ def _format_table(self, op):
ctx = self.context

orig_op = op
if isinstance(op, ops.SelfReference):
if isinstance(op, (ops.SelfReference, ops.Sample)):
op = op.table

alias = ctx.get_ref(orig_op)

if isinstance(op, ops.InMemoryTable):
result = self._format_in_memory_table(op)
elif isinstance(op, ops.PhysicalTable):
Expand All @@ -117,26 +119,28 @@ def _format_table(self, op):
db=getattr(op, "namespace", None),
quoted=self.parent.translator_class._quote_identifiers,
).sql(dialect=self.parent.translator_class._dialect_name)
elif ctx.is_extracted(op):
if isinstance(orig_op, ops.SelfReference):
result = ctx.get_ref(op)
else:
result = alias
else:
# A subquery
if ctx.is_extracted(op):
# Was put elsewhere, e.g. WITH block, we just need to grab its
# alias
alias = ctx.get_ref(orig_op)

# HACK: self-references have to be treated more carefully here
if isinstance(orig_op, ops.SelfReference):
return f"{ctx.get_ref(op)} {alias}"
else:
return alias

subquery = ctx.get_compiled_expr(orig_op)
subquery = ctx.get_compiled_expr(op)
result = f"(\n{util.indent(subquery, self.indent)}\n)"

result += f" {ctx.get_ref(orig_op)}"
if result != alias:
result = f"{result} {alias}"

if isinstance(orig_op, ops.Sample):
result = self._format_sample(orig_op, result)

return result

def _format_sample(self, op, table):
# Should never be hit in practice, as Sample operations should be rewritten
# before this point for all backends without TABLESAMPLE support
raise com.UnsupportedOperationError("`Table.sample` is not supported")

def get_result(self):
# Got to unravel the join stack; the nesting order could be
# arbitrary, so we do a depth first search and push the join tokens
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/impala/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ibis.expr.operations as ops
from ibis.backends.base.sql.compiler import Compiler, ExprTranslator, TableSetFormatter
from ibis.backends.base.sql.registry import binary_infix_ops, operation_registry, unary
from ibis.expr.rewrites import rewrite_sample


class ImpalaTableSetFormatter(TableSetFormatter):
Expand Down Expand Up @@ -58,3 +59,4 @@ def _floor_divide(op):
class ImpalaCompiler(Compiler):
translator_class = ImpalaExprTranslator
table_set_formatter_class = ImpalaTableSetFormatter
rewrites = Compiler.rewrites | rewrite_sample
2 changes: 0 additions & 2 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,7 +1535,6 @@ def test_dynamic_table_slice_with_computed_offset(backend):
"datafusion",
"druid",
"flink",
"impala",
"polars",
"snowflake",
]
Expand All @@ -1561,7 +1560,6 @@ def test_sample(backend):
"datafusion",
"druid",
"flink",
"impala",
"polars",
"snowflake",
]
Expand Down

0 comments on commit 8e78dfc

Please sign in to comment.