Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions presto-docs/src/main/sphinx/connector/iceberg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -599,9 +599,13 @@ The table is partitioned by the transformed value of the column::

ALTER TABLE iceberg.web.page_views ADD COLUMN location VARCHAR WITH (partitioning = 'bucket(8)');

.. note::
ALTER TABLE iceberg.web.page_views ADD COLUMN dt date WITH (partitioning = 'year');

ALTER TABLE iceberg.web.page_views ADD COLUMN ts timestamp WITH (partitioning = 'month');

ALTER TABLE iceberg.web.page_views ADD COLUMN dt date WITH (partitioning = 'day');

``Day``, ``Month``, ``Year``, ``Hour`` partition column transform functions are not supported in the Presto Iceberg connector.
ALTER TABLE iceberg.web.page_views ADD COLUMN ts timestamp WITH (partitioning = 'hour');

TRUNCATE
^^^^^^^^
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static org.apache.iceberg.expressions.Expressions.bucket;
import static org.apache.iceberg.expressions.Expressions.day;
import static org.apache.iceberg.expressions.Expressions.hour;
import static org.apache.iceberg.expressions.Expressions.month;
import static org.apache.iceberg.expressions.Expressions.ref;
import static org.apache.iceberg.expressions.Expressions.truncate;
import static org.apache.iceberg.expressions.Expressions.year;

public final class PartitionFields
{
Expand Down Expand Up @@ -126,10 +130,13 @@ protected static Term getTransformTerm(String columnName, String transform)
case "identity":
return ref(columnName);
case "year":
return year(columnName);
case "month":
return month(columnName);
case "day":
return day(columnName);
case "hour":
throw new UnsupportedOperationException("Currently unsupported partition transform: " + transform);
return hour(columnName);
}

Matcher matcher = COLUMN_BUCKET_PATTERN.matcher(transform);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
import java.util.function.Function;
import java.util.stream.Collectors;

import static com.facebook.presto.common.type.TimeZoneKey.UTC_KEY;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.facebook.presto.iceberg.IcebergQueryRunner.TEST_CATALOG_DIRECTORY;
import static com.facebook.presto.iceberg.IcebergQueryRunner.TEST_DATA_DIRECTORY;
Expand Down Expand Up @@ -295,11 +296,75 @@ public void testAddPartitionColumn()
assertEquals(getQueryRunner().execute("select row_count from \"add_partition_column$partitions\" where b = timestamp '1984-12-08 00:10:00' and c_trunc = 'hw' and d_bucket = 1").getOnlyValue(), 1L);
assertEquals(getQueryRunner().execute("select row_count from \"add_partition_column$partitions\" where b = timestamp '1984-12-08 00:10:00' and c_trunc = 'hw' and d_bucket = 0").getOnlyValue(), 1L);

// validate currently unsupported partition transform
assertQueryFails("alter table add_partition_column add column e timestamp with(partitioning = 'year')", "Currently unsupported partition transform: year");
assertQueryFails("alter table add_partition_column add column e timestamp with(partitioning = 'month')", "Currently unsupported partition transform: month");
assertQueryFails("alter table add_partition_column add column e timestamp with(partitioning = 'day')", "Currently unsupported partition transform: day");
assertQueryFails("alter table add_partition_column add column e timestamp with(partitioning = 'hour')", "Currently unsupported partition transform: hour");
assertQuerySucceeds("DROP TABLE add_partition_column");

Session session = Session.builder(getSession())
.setTimeZoneKey(UTC_KEY)
.build();
// Create partitioned table and insert some data
assertQuerySucceeds(session, "create table add_partition_column(a int, b varchar) with (partitioning = ARRAY['truncate(b, 2)'])");
assertQuerySucceeds(session, "insert into add_partition_column values(1, 'hw1001'), (2, 'hw1002')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 1L);
assertEquals(getQueryRunner().execute(session, "select row_count from \"add_partition_column$partitions\" where b_trunc = 'hw'").getOnlyValue(), 2L);

// Add year partition column
assertQuerySucceeds(session, "alter table add_partition_column add column c date with(partitioning = 'year')");
assertQuerySucceeds(session, "insert into add_partition_column values(3, 'hw1003', date '1984-12-08'), (4, 'hw1004', date '2001-01-08')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 3L);

assertQuery(session, "SELECT b_trunc, c_year, row_count FROM \"add_partition_column$partitions\" ORDER BY c_year",
"VALUES ('hw', 14, 1), ('hw', 31, 1), ('hw', NULL, 2)");
assertQuery(session, "SELECT * FROM add_partition_column WHERE year(c) = 1984", "VALUES (3, 'hw1003', '1984-12-08')");

assertQuerySucceeds(session, "DROP TABLE add_partition_column");

// Create partitioned table and insert some data
assertQuerySucceeds(session, "create table add_partition_column(a int, b varchar) with (partitioning = ARRAY['truncate(b, 2)'])");
assertQuerySucceeds(session, "insert into add_partition_column values(1, 'hw1001'), (2, 'hw1002')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 1L);
assertEquals(getQueryRunner().execute(session, "select row_count from \"add_partition_column$partitions\" where b_trunc = 'hw'").getOnlyValue(), 2L);

// Add month partition column
assertQuerySucceeds(session, "alter table add_partition_column add column c timestamp with(partitioning = 'month')");
assertQuerySucceeds(session, "insert into add_partition_column values(3, 'hw1003', timestamp '1984-12-08 12:10:31.315'), (4, 'hw1004', timestamp '2001-01-08 10:01:01.123')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 3L);

assertQuery(session, "SELECT b_trunc, c_month, row_count FROM \"add_partition_column$partitions\" ORDER BY c_month",
"VALUES ('hw', 179, 1), ('hw', 372, 1), ('hw', NULL, 2)");
assertQuery(session, "SELECT * FROM add_partition_column WHERE month(c) = 12", "VALUES (3, 'hw1003', '1984-12-08 12:10:31.315')");

assertQuerySucceeds(session, "DROP TABLE add_partition_column");

// Create partitioned table and insert some data
assertQuerySucceeds(session, "create table add_partition_column(a int, b varchar) with (partitioning = ARRAY['truncate(b, 2)'])");
assertQuerySucceeds(session, "insert into add_partition_column values(1, 'hw1001'), (2, 'hw1002')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 1L);
assertEquals(getQueryRunner().execute(session, "select row_count from \"add_partition_column$partitions\" where b_trunc = 'hw'").getOnlyValue(), 2L);

// Add day partition column
assertQuerySucceeds(session, "alter table add_partition_column add column c date with(partitioning = 'day')");
assertQuerySucceeds(session, "insert into add_partition_column values(3, 'hw1003', date '1984-12-08'), (4, 'hw1004', date '2001-01-09')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 3L);

assertQuerySucceeds(session, "DROP TABLE add_partition_column");

// Create partitioned table and insert some data
assertQuerySucceeds(session, "create table add_partition_column(a int, b varchar) with (partitioning = ARRAY['truncate(b, 2)'])");
assertQuerySucceeds(session, "insert into add_partition_column values(1, 'hw1001'), (2, 'hw1002')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 1L);
assertEquals(getQueryRunner().execute(session, "select row_count from \"add_partition_column$partitions\" where b_trunc = 'hw'").getOnlyValue(), 2L);

// Add hour partition column
assertQuerySucceeds(session, "alter table add_partition_column add column c timestamp with(partitioning = 'hour')");
assertQuerySucceeds(session, "insert into add_partition_column values(3, 'hw1003', timestamp '1984-12-08 12:10:31.315'), (4, 'hw1004', timestamp '2001-01-08 10:01:01.123')");
assertEquals(getQueryRunner().execute(session, "SELECT count(*) FROM \"add_partition_column$partitions\"").getOnlyValue(), 3L);

assertQuery(session, "SELECT b_trunc, c_hour, row_count FROM \"add_partition_column$partitions\" ORDER BY c_hour",
"VALUES ('hw', 130932, 1), ('hw', 271930, 1), ('hw', NULL, 2)");
assertQuery(session, "SELECT * FROM add_partition_column WHERE hour(c) = 12", "VALUES (3, 'hw1003', '1984-12-08 12:10:31.315')");

// validate hour() unsupported for date type
assertQueryFails("alter table add_partition_column add column e date with(partitioning = 'hour')", ".*hour cannot transform date values from.*");

// validate unknown partition transform, take 'other' and 'null' for example.
assertQueryFails("alter table add_partition_column add column e varchar with(partitioning = 'other')", "Unknown partition transform: other");
Expand Down