Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import javax.inject.Inject;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -68,9 +67,7 @@ public IcebergTableProperties(
List.class,
ImmutableList.of(),
false,
value -> ((Collection<?>) value).stream()
.map(name -> ((String) name).toLowerCase(ENGLISH))
.collect(toImmutableList()),
value -> (List<?>) value,
value -> value))
.add(stringProperty(
LOCATION_PROPERTY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.iceberg;

import io.trino.spi.TrinoException;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
Expand All @@ -24,23 +25,31 @@
import java.util.regex.Pattern;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY;
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.regex.Pattern.CASE_INSENSITIVE;

public final class PartitionFields
{
private static final String NAME = "[a-z_][a-z0-9_]*";
private static final String FUNCTION_ARGUMENT_NAME = "\\((" + NAME + ")\\)";
private static final String FUNCTION_ARGUMENT_NAME_AND_INT = "\\((" + NAME + "), *(\\d+)\\)";

private static final Pattern IDENTITY_PATTERN = Pattern.compile(NAME);
private static final Pattern YEAR_PATTERN = Pattern.compile("year" + FUNCTION_ARGUMENT_NAME);
private static final Pattern MONTH_PATTERN = Pattern.compile("month" + FUNCTION_ARGUMENT_NAME);
private static final Pattern DAY_PATTERN = Pattern.compile("day" + FUNCTION_ARGUMENT_NAME);
private static final Pattern HOUR_PATTERN = Pattern.compile("hour" + FUNCTION_ARGUMENT_NAME);
private static final Pattern BUCKET_PATTERN = Pattern.compile("bucket" + FUNCTION_ARGUMENT_NAME_AND_INT);
private static final Pattern TRUNCATE_PATTERN = Pattern.compile("truncate" + FUNCTION_ARGUMENT_NAME_AND_INT);
private static final Pattern VOID_PATTERN = Pattern.compile("void" + FUNCTION_ARGUMENT_NAME);
private static final String UNQUOTED_IDENTIFIER = "[a-z_][a-z0-9_]*";
private static final String QUOTED_IDENTIFIER = "\"(?:\"\"|[^\"])*\"";
public static final String IDENTIFIER = "(" + UNQUOTED_IDENTIFIER + "|" + QUOTED_IDENTIFIER + ")";
private static final Pattern UNQUOTED_IDENTIFIER_PATTERN = Pattern.compile(UNQUOTED_IDENTIFIER);
private static final Pattern QUOTED_IDENTIFIER_PATTERN = Pattern.compile(QUOTED_IDENTIFIER);

private static final String FUNCTION_ARGUMENT_NAME = "\\(" + IDENTIFIER + "\\)\\s*";
private static final String FUNCTION_ARGUMENT_NAME_AND_INT = "\\(" + IDENTIFIER + ",\\s*(\\d+)\\)";

private static final Pattern IDENTITY_PATTERN = Pattern.compile(IDENTIFIER, CASE_INSENSITIVE);
private static final Pattern YEAR_PATTERN = Pattern.compile("year" + FUNCTION_ARGUMENT_NAME, CASE_INSENSITIVE);
private static final Pattern MONTH_PATTERN = Pattern.compile("month" + FUNCTION_ARGUMENT_NAME, CASE_INSENSITIVE);
private static final Pattern DAY_PATTERN = Pattern.compile("day" + FUNCTION_ARGUMENT_NAME, CASE_INSENSITIVE);
private static final Pattern HOUR_PATTERN = Pattern.compile("hour" + FUNCTION_ARGUMENT_NAME, CASE_INSENSITIVE);
private static final Pattern BUCKET_PATTERN = Pattern.compile("bucket" + FUNCTION_ARGUMENT_NAME_AND_INT, CASE_INSENSITIVE);
private static final Pattern TRUNCATE_PATTERN = Pattern.compile("truncate" + FUNCTION_ARGUMENT_NAME_AND_INT, CASE_INSENSITIVE);
private static final Pattern VOID_PATTERN = Pattern.compile("void" + FUNCTION_ARGUMENT_NAME, CASE_INSENSITIVE);

private static final Pattern ICEBERG_BUCKET_PATTERN = Pattern.compile("bucket\\[(\\d+)]");
private static final Pattern ICEBERG_TRUNCATE_PATTERN = Pattern.compile("truncate\\[(\\d+)]");
Expand All @@ -49,31 +58,54 @@ private PartitionFields() {}

public static PartitionSpec parsePartitionFields(Schema schema, List<String> fields)
{
PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
for (String field : fields) {
parsePartitionField(builder, field);
try {
PartitionSpec.Builder builder = PartitionSpec.builderFor(schema);
for (String field : fields) {
parsePartitionField(builder, field);
}
return builder.build();
}
catch (RuntimeException e) {
throw new TrinoException(INVALID_TABLE_PROPERTY, "Unable to parse partitioning value: " + e.getMessage(), e);
}
return builder.build();
}

public static void parsePartitionField(PartitionSpec.Builder builder, String field)
{
@SuppressWarnings("PointlessBooleanExpression")
boolean matched = false ||
tryMatch(field, IDENTITY_PATTERN, match -> builder.identity(match.group())) ||
tryMatch(field, YEAR_PATTERN, match -> builder.year(match.group(1))) ||
tryMatch(field, MONTH_PATTERN, match -> builder.month(match.group(1))) ||
tryMatch(field, DAY_PATTERN, match -> builder.day(match.group(1))) ||
tryMatch(field, HOUR_PATTERN, match -> builder.hour(match.group(1))) ||
tryMatch(field, BUCKET_PATTERN, match -> builder.bucket(match.group(1), parseInt(match.group(2)))) ||
tryMatch(field, TRUNCATE_PATTERN, match -> builder.truncate(match.group(1), parseInt(match.group(2)))) ||
tryMatch(field, VOID_PATTERN, match -> builder.alwaysNull(match.group(1))) ||
tryMatch(field, IDENTITY_PATTERN, match -> builder.identity(fromIdentifierToColumn(match.group()))) ||
tryMatch(field, YEAR_PATTERN, match -> builder.year(fromIdentifierToColumn(match.group(1)))) ||
tryMatch(field, MONTH_PATTERN, match -> builder.month(fromIdentifierToColumn(match.group(1)))) ||
tryMatch(field, DAY_PATTERN, match -> builder.day(fromIdentifierToColumn(match.group(1)))) ||
tryMatch(field, HOUR_PATTERN, match -> builder.hour(fromIdentifierToColumn(match.group(1)))) ||
tryMatch(field, BUCKET_PATTERN, match -> builder.bucket(fromIdentifierToColumn(match.group(1)), parseInt(match.group(2)))) ||
tryMatch(field, TRUNCATE_PATTERN, match -> builder.truncate(fromIdentifierToColumn(match.group(1)), parseInt(match.group(2)))) ||
tryMatch(field, VOID_PATTERN, match -> builder.alwaysNull(fromIdentifierToColumn(match.group(1)))) ||
false;
if (!matched) {
throw new IllegalArgumentException("Invalid partition field declaration: " + field);
}
}

private static String fromIdentifierToColumn(String identifier)
{
if (QUOTED_IDENTIFIER_PATTERN.matcher(identifier).matches()) {
// We only support lowercase quoted identifiers for now.
// See https://github.com/trinodb/trino/issues/12226#issuecomment-1128839259
// TODO: Enhance quoted identifiers support in Iceberg partitioning to support mixed case identifiers
// See https://github.com/trinodb/trino/issues/12668
if (!identifier.toLowerCase(ENGLISH).equals(identifier)) {
throw new IllegalArgumentException(format("Uppercase characters in identifier '%s' are not supported.", identifier));
}
return identifier.substring(1, identifier.length() - 1).replace("\"\"", "\"");
}
// Currently, all Iceberg columns are stored in lowercase in the Iceberg metadata files.
// Unquoted identifiers are canonicalized to lowercase here which is not according ANSI SQL spec.
// See https://github.com/trinodb/trino/issues/17
return identifier.toLowerCase(ENGLISH);
}

private static boolean tryMatch(CharSequence value, Pattern pattern, Consumer<MatchResult> match)
{
Matcher matcher = pattern.matcher(value);
Expand All @@ -93,7 +125,7 @@ public static List<String> toPartitionFields(PartitionSpec spec)

private static String toPartitionField(PartitionSpec spec, PartitionField field)
{
String name = spec.schema().findColumnName(field.sourceId());
String name = fromColumnToIdentifier(spec.schema().findColumnName(field.sourceId()));
String transform = field.transform().toString();

switch (transform) {
Expand All @@ -119,4 +151,17 @@ private static String toPartitionField(PartitionSpec spec, PartitionField field)

throw new UnsupportedOperationException("Unsupported partition transform: " + field);
}

private static String fromColumnToIdentifier(String column)
{
return quotedName(column);
}

private static String quotedName(String name)
{
if (UNQUOTED_IDENTIFIER_PATTERN.matcher(name).matches()) {
return name;
}
return '"' + name.replace("\"", "\"\"") + '"';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,8 @@ public void testCreatePartitionedTable()
" a_uuid uuid, " +
" a_row row(id integer , vc varchar), " +
" an_array array(varchar), " +
" a_map map(integer, varchar) " +
" a_map map(integer, varchar), " +
" \"a quoted, field\" varchar" +
") " +
"WITH (" +
"partitioning = ARRAY[" +
Expand All @@ -702,7 +703,8 @@ public void testCreatePartitionedTable()
" 'a_time', " +
" 'a_timestamp', " +
" 'a_timestamptz', " +
" 'a_uuid' " +
" 'a_uuid', " +
" '\"a quoted, field\"' " +
// Note: partitioning on non-primitive columns is not allowed in Iceberg
" ]" +
")");
Expand All @@ -726,9 +728,10 @@ public void testCreatePartitionedTable()
"UUID '20050910-1330-11e9-ffff-2a86e4085a59', " +
"CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " +
"ARRAY[VARCHAR 'uno', 'dos', 'tres'], " +
"map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one'])) ";
"map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']), " +
"VARCHAR 'tralala')";

String nullValues = nCopies(17, "NULL").stream()
String nullValues = nCopies(18, "NULL").stream()
.collect(joining(", ", "VALUES (", ")"));

assertUpdate("INSERT INTO test_partitioned_table " + values, 1);
Expand Down Expand Up @@ -757,6 +760,7 @@ public void testCreatePartitionedTable()
"AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " +
"AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " +
"AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " +
"AND \"a quoted, field\" = VARCHAR 'tralala' " +
""))
.matches(values);

Expand All @@ -778,6 +782,7 @@ public void testCreatePartitionedTable()
"AND a_row IS NULL " +
"AND an_array IS NULL " +
"AND a_map IS NULL " +
"AND \"a quoted, field\" IS NULL " +
""))
.skippingTypesCheck()
.matches(nullValues);
Expand All @@ -803,6 +808,7 @@ public void testCreatePartitionedTable()
" ('a_row', NULL, NULL, 0.5, NULL, NULL, NULL), " +
" ('an_array', NULL, NULL, 0.5, NULL, NULL, NULL), " +
" ('a_map', NULL, NULL, 0.5, NULL, NULL, NULL), " +
" ('a quoted, field', NULL, NULL, 0.5, NULL, NULL, NULL), " +
" (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)");
}
if (format == PARQUET) {
Expand All @@ -826,6 +832,7 @@ public void testCreatePartitionedTable()
" ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('a quoted, field', 83e0, NULL, 0.5e0, NULL, NULL, NULL), " +
" (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)");
}
else if (format == AVRO) {
Expand All @@ -849,6 +856,7 @@ else if (format == AVRO) {
" ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " +
" ('a quoted, field', NULL, NULL, 0.5e0, NULL, NULL, NULL), " +
" (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)");
}

Expand All @@ -873,7 +881,8 @@ else if (format == AVRO) {
" partition.a_time, " +
" partition.a_timestamp, " +
" partition.a_timestamptz, " +
" partition.a_uuid " +
" partition.a_uuid, " +
" partition.\"a quoted, field\" " +
// Note: partitioning on non-primitive columns is not allowed in Iceberg
" FROM \"test_partitioned_table$partitions\" "))
.matches("" +
Expand All @@ -893,7 +902,8 @@ else if (format == AVRO) {
" TIME '02:43:57.987654', " +
" TIMESTAMP '2021-07-24 03:43:57.987654'," +
" TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " +
" UUID '20050910-1330-11e9-ffff-2a86e4085a59' " +
" UUID '20050910-1330-11e9-ffff-2a86e4085a59', " +
" VARCHAR 'tralala' " +
")" +
"UNION ALL " +
"VALUES (" +
Expand All @@ -912,7 +922,8 @@ else if (format == AVRO) {
" NULL, " +
" NULL, " +
" NULL, " +
" NULL " +
" NULL, " +
" NULL " +
")");

assertUpdate("DROP TABLE test_partitioned_table");
Expand Down Expand Up @@ -944,26 +955,26 @@ public void testCreatePartitionedTableAs()
"WITH (" +
"format_version = 2," +
"location = '" + tempDirPath + "', " +
"partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(order_key,9)']" +
"partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(\"order key\",9)']" +
") " +
"AS " +
"SELECT orderkey AS order_key, shippriority AS ship_priority, orderstatus AS order_status " +
"SELECT orderkey AS \"order key\", shippriority AS ship_priority, orderstatus AS order_status " +
"FROM tpch.tiny.orders",
"SELECT count(*) from orders");

assertEquals(
computeScalar("SHOW CREATE TABLE test_create_partitioned_table_as"),
format(
"CREATE TABLE %s.%s.%s (\n" +
" order_key bigint,\n" +
" \"order key\" bigint,\n" +
" ship_priority integer,\n" +
" order_status varchar\n" +
")\n" +
"WITH (\n" +
" format = '%s',\n" +
" format_version = 2,\n" +
" location = '%s',\n" +
" partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" +
" partitioning = ARRAY['order_status','ship_priority','bucket(\"order key\", 9)']\n" +
")",
getSession().getCatalog().orElseThrow(),
getSession().getSchema().orElseThrow(),
Expand All @@ -976,6 +987,43 @@ public void testCreatePartitionedTableAs()
dropTable("test_create_partitioned_table_as");
}

@DataProvider(name = "partitionedTableWithQuotedIdentifierCasing")
public static Object[][] partitionedTableWithQuotedIdentifierCasing()
{
return new Object[][] {
{"x", "x", true},
{"X", "x", true},
{"\"x\"", "x", true},
{"\"X\"", "x", true},
{"x", "\"x\"", true},
{"X", "\"x\"", true},
{"\"x\"", "\"x\"", true},
{"\"X\"", "\"x\"", true},
{"x", "X", true},
{"X", "X", true},
{"\"x\"", "X", true},
{"\"X\"", "X", true},
{"x", "\"X\"", false},
{"X", "\"X\"", false},
{"\"x\"", "\"X\"", false},
{"\"X\"", "\"X\"", false},
};
}

@Test(dataProvider = "partitionedTableWithQuotedIdentifierCasing")
public void testCreatePartitionedTableWithQuotedIdentifierCasing(String columnName, String partitioningField, boolean success)
{
String tableName = "partitioning_" + randomTableSuffix();
@Language("SQL") String sql = format("CREATE TABLE %s (%s bigint) WITH (partitioning = ARRAY['%s'])", tableName, columnName, partitioningField);
if (success) {
assertThat(query(sql)).matches("VALUES (true)");
dropTable(tableName);
}
else {
assertQueryFails(sql, "Unable to parse partitioning value: .*");
}
}

@Test
public void testTableComments()
{
Expand Down
Loading