trinodb · jerryleooo · Nov 29, 2021 · Jan 5, 2022 · Jan 5, 2022 · Jan 7, 2022
diff --git a/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/StandardColumnMappings.java b/plugin/trino-base-jdbc/src/main/java/io/trino/plugin/jdbc/StandardColumnMappings.java
@@ -509,21 +509,40 @@ public static ColumnMapping timestampColumnMappingUsingSqlTimestampWithRounding(
         checkArgument(timestampType.getPrecision() <= TimestampType.MAX_SHORT_PRECISION, "Precision is out of range: %s", timestampType.getPrecision());
         return ColumnMapping.longMapping(
                 timestampType,
-                (resultSet, columnIndex) -> {
-                    LocalDateTime localDateTime = resultSet.getTimestamp(columnIndex).toLocalDateTime();
-                    int roundedNanos = toIntExact(round(localDateTime.getNano(), 9 - timestampType.getPrecision()));
-                    LocalDateTime rounded = localDateTime
-                            .withNano(0)
-                            .plusNanos(roundedNanos);
-                    return toTrinoTimestamp(timestampType, rounded);
-                },
+                timestampWithRoundingReadFunction(timestampType),
                 timestampWriteFunctionUsingSqlTimestamp(timestampType),
                 // NOTE: pushdown is disabled because the values stored in remote system might not match the values as
                 // read by Trino due to rounding. This can lead to incorrect results if operations are pushed down to
                 // the remote system.
                 DISABLE_PUSHDOWN);
     }
 
+    @Deprecated
+    public static ColumnMapping timestampColumnMappingUsingSqlTimestampWithRoundingFullPushdown(TimestampType timestampType)
+    {
+        // TODO: https://druid.apache.org/docs/latest/ingestion/ingestion-spec.html#timestampspec imply Druid support nanosecond precision
+        checkArgument(timestampType.getPrecision() <= TimestampType.MAX_SHORT_PRECISION, "Precision is out of range: %s", timestampType.getPrecision());
+        return ColumnMapping.longMapping(
+                timestampType,
+                timestampWithRoundingReadFunction(timestampType),
+                timestampWriteFunctionUsingSqlTimestamp(timestampType),
+                // NOTE: as noted in the "DISABLE_PUSHDOWN" version, the values stored in remote system might not match
+                // the values as read by Trino due to rounding.
+                FULL_PUSHDOWN);
+    }
+
+    private static LongReadFunction timestampWithRoundingReadFunction(TimestampType timestampType)
+    {
+        return (resultSet, columnIndex) -> {
+            LocalDateTime localDateTime = resultSet.getTimestamp(columnIndex).toLocalDateTime();
+            int roundedNanos = toIntExact(round(localDateTime.getNano(), 9 - timestampType.getPrecision()));
+            LocalDateTime rounded = localDateTime
+                    .withNano(0)
+                    .plusNanos(roundedNanos);
+            return toTrinoTimestamp(timestampType, rounded);
+        };
+    }
+
     public static ColumnMapping timestampColumnMapping(TimestampType timestampType)
     {
         if (timestampType.getPrecision() <= TimestampType.MAX_SHORT_PRECISION) {

diff --git a/plugin/trino-druid/pom.xml b/plugin/trino-druid/pom.xml
@@ -38,6 +38,11 @@
             <artifactId>units</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-core</artifactId>
+        </dependency>
+
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-databind</artifactId>
@@ -159,6 +164,13 @@
             <scope>test</scope>
         </dependency>
 
+        <dependency>
+            <groupId>org.freemarker</groupId>
+            <artifactId>freemarker</artifactId>
+            <version>2.3.31</version>
+            <scope>test</scope>
+        </dependency>
+
         <dependency>
             <groupId>org.jetbrains</groupId>
             <artifactId>annotations</artifactId>
@@ -182,5 +194,6 @@
             <artifactId>testng</artifactId>
             <scope>test</scope>
         </dependency>
+
     </dependencies>
 </project>
diff --git a/plugin/trino-druid/src/main/java/io/trino/plugin/druid/DruidJdbcClient.java b/plugin/trino-druid/src/main/java/io/trino/plugin/druid/DruidJdbcClient.java
@@ -79,7 +79,7 @@
 import static io.trino.plugin.jdbc.StandardColumnMappings.smallintColumnMapping;
 import static io.trino.plugin.jdbc.StandardColumnMappings.smallintWriteFunction;
 import static io.trino.plugin.jdbc.StandardColumnMappings.timeColumnMappingUsingSqlTime;
-import static io.trino.plugin.jdbc.StandardColumnMappings.timestampColumnMappingUsingSqlTimestampWithRounding;
+import static io.trino.plugin.jdbc.StandardColumnMappings.timestampColumnMappingUsingSqlTimestampWithRoundingFullPushdown;
 import static io.trino.plugin.jdbc.StandardColumnMappings.tinyintColumnMapping;
 import static io.trino.plugin.jdbc.StandardColumnMappings.tinyintWriteFunction;
 import static io.trino.plugin.jdbc.StandardColumnMappings.varbinaryColumnMapping;
@@ -107,12 +107,12 @@
 public class DruidJdbcClient
         extends BaseJdbcClient
 {
+    // All the datasources in Druid are created under schema "druid"
+    private static final String DRUID_SCHEMA = "druid";
     // Druid maintains its datasources related metadata by setting the catalog name as "druid"
     // Note that while a user may name the catalog name as something else, metadata queries made
     // to druid will always have the TABLE_CATALOG set to DRUID_CATALOG
     private static final String DRUID_CATALOG = "druid";
-    // All the datasources in Druid are created under schema "druid"
-    public static final String DRUID_SCHEMA = "druid";
 
     @Inject
     public DruidJdbcClient(BaseJdbcConfig config, ConnectionFactory connectionFactory, IdentifierMapping identifierMapping)
@@ -248,8 +248,10 @@ public Optional<ColumnMapping> toColumnMapping(ConnectorSession session, Connect
                 return Optional.of(timeColumnMappingUsingSqlTime());
 
             case Types.TIMESTAMP:
-                // TODO Consider using `StandardColumnMappings.timestampColumnMapping`
-                return Optional.of(timestampColumnMappingUsingSqlTimestampWithRounding(TIMESTAMP_MILLIS));
+                // TODO: use timestampColumnMapping when https://issues.apache.org/jira/browse/CALCITE-1630 gets resolved
+                // As mentioned in https://druid.apache.org/docs/latest/ingestion/ingestion-spec.html#timestampspec
+                // Druid supports `nanosecond` in ingestion. There is no clear document for querying, so for now keep it using MILLISECOND
+                return Optional.of(timestampColumnMappingUsingSqlTimestampWithRoundingFullPushdown(TIMESTAMP_MILLIS));
         }
 
         if (getUnsupportedTypeHandling(session) == CONVERT_TO_VARCHAR) {

diff --git a/plugin/trino-druid/src/test/java/io/trino/plugin/druid/BaseDruidConnectorTest.java b/plugin/trino-druid/src/test/java/io/trino/plugin/druid/BaseDruidConnectorTest.java
@@ -13,12 +13,16 @@
  */
 package io.trino.plugin.druid;
 
+import com.google.common.collect.ImmutableMap;
+import io.trino.Session;
 import io.trino.plugin.jdbc.BaseJdbcConnectorTest;
 import io.trino.plugin.jdbc.JdbcTableHandle;
 import io.trino.spi.connector.ConnectorSession;
 import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.predicate.TupleDomain;
 import io.trino.sql.planner.assertions.PlanMatchPattern;
 import io.trino.sql.planner.plan.AggregationNode;
+import io.trino.sql.planner.plan.FilterNode;
 import io.trino.sql.planner.plan.JoinNode;
 import io.trino.sql.planner.plan.TableScanNode;
 import io.trino.sql.planner.plan.TopNNode;
@@ -35,6 +39,7 @@
 import static io.trino.spi.type.VarcharType.VARCHAR;
 import static io.trino.sql.planner.assertions.PlanMatchPattern.anyTree;
 import static io.trino.sql.planner.assertions.PlanMatchPattern.node;
+import static io.trino.sql.planner.assertions.PlanMatchPattern.tableScan;
 import static io.trino.testing.MaterializedResult.resultBuilder;
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -275,4 +280,122 @@ public void testLimitPushDown()
                         "LIMIT 30"))
                 .isNotFullyPushedDown(joinOverTableScans);
     }
+
+    @Test
+    public void testPredicatePushdown()
+    {
+        assertThat(query("SELECT * FROM orders where __time > DATE'1970-01-01'")).isFullyPushedDown();
+        assertThat(query("SELECT * FROM orders where totalprice > 0")).isFullyPushedDown();
+        assertThat(query("SELECT * FROM orders where comment = ''")).isFullyPushedDown();
+
+        // varchar equality
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name = 'ROMANIA'"))
+                .matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))")
+                .isFullyPushedDown();
+
+        // varchar range
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name BETWEEN 'POLAND' AND 'RPA'"))
+                .matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))")
+                .isNotFullyPushedDown(FilterNode.class);
+
+        // varchar IN without domain compaction
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name IN ('POLAND', 'ROMANIA', 'VIETNAM')"))
+                .matches("VALUES " +
+                        "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " +
+                        "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))")
+                .isFullyPushedDown();
+
+        // varchar IN with small compaction threshold
+        assertThat(query(
+                Session.builder(getSession())
+                        .setCatalogSessionProperty("postgresql", "domain_compaction_threshold", "1")
+                        .build(),
+                "SELECT regionkey, nationkey, name FROM nation WHERE name IN ('POLAND', 'ROMANIA', 'VIETNAM')"))
+                .matches("VALUES " +
+                        "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " +
+                        "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))")
+                // Filter node is retained as no constraint is pushed into connector.
+                // The compacted domain is a range predicate which can give wrong results
+                // if pushed down as PostgreSQL has different sort ordering for letters from Trino
+                .isNotFullyPushedDown(
+                        node(
+                                FilterNode.class,
+                                // verify that no constraint is applied by the connector
+                                tableScan(
+                                        tableHandle -> ((JdbcTableHandle) tableHandle).getConstraint().isAll(),
+                                        TupleDomain.all(),
+                                        ImmutableMap.of())));
+
+        // varchar different case
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name = 'romania'"))
+                .returnsEmptyResult()
+                .isFullyPushedDown();
+
+        // bigint equality
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE nationkey = 19"))
+                .matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))")
+                .isFullyPushedDown();
+
+        // bigint equality with small compaction threshold
+        assertThat(query(
+                Session.builder(getSession())
+                        .setCatalogSessionProperty("postgresql", "domain_compaction_threshold", "1")
+                        .build(),
+                "SELECT regionkey, nationkey, name FROM nation WHERE nationkey IN (19, 21)"))
+                .matches("VALUES " +
+                        "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " +
+                        "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))")
+                .isNotFullyPushedDown(FilterNode.class);
+
+        // bigint range, with decimal to bigint simplification
+        assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE nationkey BETWEEN 18.5 AND 19.5"))
+                .matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))")
+                .isFullyPushedDown();
+
+        // date equality
+        assertThat(query("SELECT orderkey FROM orders WHERE orderdate = DATE '1992-09-29'"))
+                .matches("VALUES BIGINT '1250', 34406, 38436, 57570")
+                .isFullyPushedDown();
+
+        // predicate over aggregation key (likely to be optimized before being pushed down into the connector)
+        assertThat(query("SELECT * FROM (SELECT regionkey, sum(nationkey) FROM nation GROUP BY regionkey) WHERE regionkey = 3"))
+                .matches("VALUES (BIGINT '3', BIGINT '77')")
+                .isFullyPushedDown();
+
+        // predicate over aggregation result
+        assertThat(query("SELECT regionkey, sum(nationkey) FROM nation GROUP BY regionkey HAVING sum(nationkey) = 77"))
+                .matches("VALUES (BIGINT '3', BIGINT '77')")
+                .isFullyPushedDown();
+
+        // predicate over TopN result
+        assertThat(query("" +
+                "SELECT orderkey " +
+                "FROM (SELECT * FROM orders ORDER BY orderdate DESC, orderkey ASC LIMIT 10)" +
+                "WHERE orderdate = DATE '1998-08-01'"))
+                .matches("VALUES BIGINT '27588', 22403, 37735")
+                .ordered()
+                .isFullyPushedDown();
+
+        assertThat(query("" +
+                "SELECT custkey " +
+                "FROM (SELECT SUM(totalprice) as sum, custkey, COUNT(*) as cnt FROM orders GROUP BY custkey order by sum desc limit 10) " +
+                "WHERE cnt > 30"))
+                .matches("VALUES BIGINT '643', 898")
+                .ordered()
+                .isFullyPushedDown();
+
+        // predicate over join
+        Session joinPushdownEnabled = joinPushdownEnabled(getSession());
+        assertThat(query(joinPushdownEnabled, "SELECT c.name, n.name FROM customer c JOIN nation n ON c.custkey = n.nationkey WHERE acctbal > 8000"))
+                .isFullyPushedDown();
+
+        // varchar predicate over join
+        assertThat(query(joinPushdownEnabled, "SELECT c.name, n.name FROM customer c JOIN nation n ON c.custkey = n.nationkey WHERE address = 'TcGe5gaZNgVePxU5kRrvXBfkasDTea'"))
+                .isFullyPushedDown();
+        assertThat(query(joinPushdownEnabled, "SELECT c.name, n.name FROM customer c JOIN nation n ON c.custkey = n.nationkey WHERE address < 'TcGe5gaZNgVePxU5kRrvXBfkasDTea'"))
+                .isNotFullyPushedDown(
+                        node(JoinNode.class,
+                                anyTree(node(TableScanNode.class)),
+                                anyTree(node(TableScanNode.class))));
+    }
 }
diff --git a/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidCreateAndInsertDataSetup.java b/plugin/trino-druid/src/test/java/io/trino/plugin/druid/DruidCreateAndInsertDataSetup.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.druid;
+
+import io.airlift.log.Logger;
+import io.trino.plugin.druid.ingestion.IndexTaskBuilder;
+import io.trino.plugin.druid.ingestion.TimestampSpec;
+import io.trino.testing.datatype.ColumnSetup;
+import io.trino.testing.datatype.DataSetup;
+import io.trino.testing.sql.SqlExecutor;
+import io.trino.testing.sql.TestTable;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static java.lang.String.format;
+
+public class DruidCreateAndInsertDataSetup
+        implements DataSetup
+{
+    private static final Logger log = Logger.get(DruidCreateAndInsertDataSetup.class);
+    private final SqlExecutor sqlExecutor;
+    private final TestingDruidServer druidServer;
+    private final String dataSourceNamePrefix;
+
+    public DruidCreateAndInsertDataSetup(SqlExecutor sqlExecutor, TestingDruidServer druidServer, String dataSourceNamePrefix)
+    {
+        this.sqlExecutor = sqlExecutor;
+        this.druidServer = druidServer;
+        this.dataSourceNamePrefix = dataSourceNamePrefix;
+    }
+
+    @Override
+    public TestTable setupTestTable(List<ColumnSetup> inputs)
+    {
+        TestTable testTable = new TestTable(this.sqlExecutor, this.dataSourceNamePrefix, "(col1 TIMESTAMP(3))", false);
+        try {
+            ingestData(testTable, inputs);
+        }
+        catch (Exception e) {
+            log.error(e);
+        }
+        return testTable;
+    }
+
+    private void ingestData(TestTable testTable, List<ColumnSetup> inputs)
+            throws Exception
+    {
+        IndexTaskBuilder builder = new IndexTaskBuilder();
+        builder.setDatasource(testTable.getName());
+        TimestampSpec timestampSpec = getTimestampSpec(inputs);
+        builder.setTimestampSpec(timestampSpec);
+
+        List<ColumnSetup> normalInputs = inputs.stream().filter(input -> !isTimestampDimension(input)).collect(Collectors.toList());
+        for (int index = 0; index < inputs.size() - 1; index++) {
+            builder.addColumn(format("col_%s", index), normalInputs.get(index).getDeclaredType().orElse("string"));
+        }
+
+        String dataFilePath = format("%s.tsv", testTable.getName());
+        writeTsvFile(dataFilePath, inputs);
+
+        log.info(builder.build());
+        this.druidServer.ingestDataWithoutTaskFile(builder.build(), dataFilePath, testTable.getName());
+    }
+
+    private TimestampSpec getTimestampSpec(List<ColumnSetup> inputs)
+    {
+        List<ColumnSetup> timestampInputs = inputs.stream().filter(this::isTimestampDimension).collect(Collectors.toList());
+
+        if (timestampInputs.size() > 1) {
+            throw new UnsupportedOperationException("Druid only allows one timestamp field");
+        }
+
+        return new TimestampSpec("dummy_druid_ts", "auto");
+    }
+
+    private boolean isTimestampDimension(ColumnSetup input)
+    {
+        if (input.getDeclaredType().isEmpty()) {
+            return false;
+        }
+        String type = input.getDeclaredType().get();
+
+        // TODO: support more types
+        if (type.startsWith("timestamp")) {
+            return true;
+        }
+
+        return false;
+    }
+
+    private void writeTsvFile(String dataFilePath, List<ColumnSetup> inputs)
+            throws IOException
+    {
+        String tsvFileLocation = format("%s/%s", druidServer.getHostWorkingDirectory(), dataFilePath);
+        File file = new File(tsvFileLocation);
+        try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
+            bw.write(inputs.stream().map(ColumnSetup::getInputLiteral).collect(Collectors.joining("\t")));
+        }
+    }
+}