diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableProperties.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableProperties.java index 604e730ce69d..1c525cc841b2 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableProperties.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableProperties.java @@ -41,6 +41,7 @@ public class IcebergTableProperties { public static final String FILE_FORMAT_PROPERTY = "format"; public static final String PARTITIONING_PROPERTY = "partitioning"; + public static final String SORTED_BY_PROPERTY = "sorted_by"; public static final String LOCATION_PROPERTY = "location"; public static final String FORMAT_VERSION_PROPERTY = "format_version"; public static final String ORC_BLOOM_FILTER_COLUMNS = "orc_bloom_filter_columns"; @@ -69,6 +70,15 @@ public IcebergTableProperties( false, value -> (List) value, value -> value)) + .add(new PropertyMetadata<>( + SORTED_BY_PROPERTY, + "Sorted columns", + new ArrayType(VARCHAR), + List.class, + ImmutableList.of(), + false, + value -> (List) value, + value -> value)) .add(stringProperty( LOCATION_PROPERTY, "File system location URI for the table", @@ -118,6 +128,13 @@ public static List getPartitioning(Map tableProperties) return partitioning == null ? ImmutableList.of() : ImmutableList.copyOf(partitioning); } + @SuppressWarnings("unchecked") + public static List getSortOrder(Map tableProperties) + { + List sortedBy = (List) tableProperties.get(SORTED_BY_PROPERTY); + return sortedBy == null ? ImmutableList.of() : ImmutableList.copyOf(sortedBy); + } + public static Optional getTableLocation(Map tableProperties) { return Optional.ofNullable((String) tableProperties.get(LOCATION_PROPERTY)); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index 512c5ef2331c..e31b5e7f8cbf 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -50,6 +50,7 @@ import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.StructLike; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; @@ -98,9 +99,11 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.getOrcBloomFilterColumns; import static io.trino.plugin.iceberg.IcebergTableProperties.getOrcBloomFilterFpp; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; +import static io.trino.plugin.iceberg.IcebergTableProperties.getSortOrder; import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation; import static io.trino.plugin.iceberg.PartitionFields.parsePartitionFields; import static io.trino.plugin.iceberg.PartitionFields.toPartitionFields; +import static io.trino.plugin.iceberg.SortFields.parseSortFields; import static io.trino.plugin.iceberg.TrinoTypes.getNextValue; import static io.trino.plugin.iceberg.TrinoTypes.getPreviousValue; import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; @@ -148,7 +151,13 @@ public final class IcebergUtil { - private static final Pattern SIMPLE_NAME = Pattern.compile("[a-z][a-z0-9]*"); + private static final Pattern SIMPLE_NAME = Pattern.compile("\\s*?[a-z][a-z0-9]*\\s*?"); + private static final String UNQUOTED_IDENTIFIER = "\\s*?[a-zA-Z_][a-zA-Z0-9_]*\\s*?"; + private static final Pattern UNQUOTED_IDENTIFIER_PATTERN = Pattern.compile(UNQUOTED_IDENTIFIER); + private static final String QUOTED_IDENTIFIER = "\"(?:\"\"|[^\"A-Z])*\""; + public static final String IDENTIFIER = "(" + UNQUOTED_IDENTIFIER + "|" + QUOTED_IDENTIFIER + ")"; + public static final String FUNCTION_ARGUMENT_NAME = "\\s*?\\(" + IDENTIFIER + "\\)\\s*?"; + public static final String FUNCTION_ARGUMENT_NAME_AND_INT = "\\s*?\\(" + IDENTIFIER + ",\\s*?(\\d+)\\s*?\\)"; private IcebergUtil() {} @@ -565,6 +574,7 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec SchemaTableName schemaTableName = tableMetadata.getTable(); Schema schema = schemaFromMetadata(tableMetadata.getColumns()); PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties())); + SortOrder sortOrder = buildSortFields(schema, getSortOrder(tableMetadata.getProperties())); String targetPath = getTableLocation(tableMetadata.getProperties()) .orElseGet(() -> catalog.defaultTableLocation(session, schemaTableName)); @@ -586,7 +596,7 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get()); } - return catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, targetPath, propertiesBuilder.buildOrThrow()); + return catalog.newCreateTableTransaction(session, schemaTableName, schema, partitionSpec, sortOrder, targetPath, propertiesBuilder.buildOrThrow()); } public static long getSnapshotIdAsOfTime(Table table, long epochMillis) @@ -610,6 +620,22 @@ public static void validateTableCanBeDropped(Table table) } } + public static String fromIdentifier(String identifier) + { + if (identifier.startsWith("\"") && identifier.endsWith("\"")) { + return identifier.substring(1, identifier.length() - 1).replace("\"\"", "\""); + } + return identifier.toLowerCase(Locale.ENGLISH); + } + + public static String toIdentifier(String column) + { + if (UNQUOTED_IDENTIFIER_PATTERN.matcher(column).matches()) { + return column; + } + return "\"" + column.replace("\"", "\"\"") + "\""; + } + private static void checkFormatForProperty(FileFormat actualStorageFormat, FileFormat expectedStorageFormat, String propertyName) { if (actualStorageFormat != expectedStorageFormat) { @@ -626,4 +652,14 @@ private static void validateOrcBloomFilterColumns(ConnectorTableMetadata tableMe throw new TrinoException(INVALID_TABLE_PROPERTY, format("Orc bloom filter columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(orcBloomFilterColumns), allColumns))); } } + + private static SortOrder buildSortFields(Schema schema, List fields) + { + try { + return parseSortFields(schema, fields); + } + catch (RuntimeException re) { + throw new TrinoException(INVALID_TABLE_PROPERTY, "Unable to parse sorting value", re); + } + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/SortFields.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/SortFields.java new file mode 100644 index 000000000000..ddedb884f838 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/SortFields.java @@ -0,0 +1,262 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import org.apache.iceberg.NullOrder; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SortField; +import org.apache.iceberg.SortOrder; +import org.apache.iceberg.expressions.Expressions; + +import java.util.List; +import java.util.function.Consumer; +import java.util.regex.MatchResult; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.iceberg.IcebergUtil.FUNCTION_ARGUMENT_NAME; +import static io.trino.plugin.iceberg.IcebergUtil.FUNCTION_ARGUMENT_NAME_AND_INT; +import static io.trino.plugin.iceberg.IcebergUtil.IDENTIFIER; +import static io.trino.plugin.iceberg.IcebergUtil.fromIdentifier; +import static io.trino.plugin.iceberg.IcebergUtil.toIdentifier; +import static java.lang.Integer.parseInt; +import static java.lang.String.format; + +public final class SortFields +{ + // YEAR patterns + private static final Pattern YEAR_PATTERN = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME); + private static final Pattern YEAR_PATTERN_ASC = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?"); + private static final Pattern YEAR_PATTERN_ASC_NULLS_FIRST = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern YEAR_PATTERN_ASC_NULLS_LAST = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern YEAR_PATTERN_DESC = Pattern.compile("\\s*?year" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?"); + private static final Pattern YEAR_PATTERN_DESC_NULLS_FIRST = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern YEAR_PATTERN_DESC_NULLS_LAST = Pattern.compile("\\s*?(?i:year)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // MONTH patterns + private static final Pattern MONTH_PATTERN = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME); + private static final Pattern MONTH_PATTERN_ASC = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?"); + private static final Pattern MONTH_PATTERN_ASC_NULLS_FIRST = Pattern.compile("\\s*?(?i:(?i:month))" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern MONTH_PATTERN_ASC_NULLS_LAST = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern MONTH_PATTERN_DESC = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?"); + private static final Pattern MONTH_PATTERN_DESC_NULLS_FIRST = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern MONTH_PATTERN_DESC_NULLS_LAST = Pattern.compile("\\s*?(?i:month)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // DAY patterns + private static final Pattern DAY_PATTERN = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME); + private static final Pattern DAY_PATTERN_ASC = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?"); + private static final Pattern DAY_PATTERN_ASC_NULLS_FIRST = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern DAY_PATTERN_ASC_NULLS_LAST = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern DAY_PATTERN_DESC = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?"); + private static final Pattern DAY_PATTERN_DESC_NULLS_FIRST = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern DAY_PATTERN_DESC_NULLS_LAST = Pattern.compile("\\s*?(?i:day)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // HOUR patterns + private static final Pattern HOUR_PATTERN = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME); + private static final Pattern HOUR_PATTERN_ASC = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?"); + private static final Pattern HOUR_PATTERN_ASC_NULLS_FIRST = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern HOUR_PATTERN_ASC_NULLS_LAST = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern HOUR_PATTERN_DESC = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?"); + private static final Pattern HOUR_PATTERN_DESC_NULLS_FIRST = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern HOUR_PATTERN_DESC_NULLS_LAST = Pattern.compile("\\s*?(?i:hour)" + FUNCTION_ARGUMENT_NAME + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // Truncate + private static final Pattern ICEBERG_TRUNCATE_PATTERN = Pattern.compile("truncate\\[(\\d+)]"); + private static final Pattern TRUNCATE_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT); + private static final Pattern TRUNCATE_ASC_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(ASC)\\s*?"); + private static final Pattern TRUNCATE_ASC_NULLS_FIRST_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern TRUNCATE_ASC_NULLS_LAST_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern TRUNCATE_DESC_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(DESC)\\s*?"); + private static final Pattern TRUNCATE_DESC_NULLS_FIRST_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern TRUNCATE_DESC_NULLS_LAST_PATTERN = Pattern.compile("\\s*?(?i:truncate)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // Bucket + private static final Pattern ICEBERG_BUCKET_PATTERN = Pattern.compile("\\s*?(?i:bucket)\\[(\\d+)]"); + private static final Pattern BUCKET_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT); + private static final Pattern BUCKET_ASC_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(ASC)\\s*?"); + private static final Pattern BUCKET_ASC_NULLS_FIRST_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + " \\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern BUCKET_ASC_NULLS_LAST_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + " \\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern BUCKET_DESC_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + "\\s*?(DESC)\\s*?"); + private static final Pattern BUCKET_DESC_NULLS_FIRST_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + " \\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern BUCKET_DESC_NULLS_LAST_PATTERN = Pattern.compile("\\s*?(?i:bucket)" + FUNCTION_ARGUMENT_NAME_AND_INT + " \\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + // identity + private static final Pattern IDENTITY_PATTERN = Pattern.compile(IDENTIFIER); + private static final Pattern IDENTITY_ASC_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(ASC)\\s*?"); + private static final Pattern IDENTITY_ASC_NULLS_FIRST_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(ASC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern IDENTITY_ASC_NULLS_LAST_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(ASC)\\s*?(NULLS\\s*?LAST)\\s*?"); + private static final Pattern IDENTITY_DESC_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(DESC)\\s*?"); + private static final Pattern IDENTITY_DESC_NULLS_FIRST_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(DESC)\\s*?(NULLS\\s*?FIRST)\\s*?"); + private static final Pattern IDENTITY_DESC_NULLS_LAST_PATTERN = Pattern.compile(IDENTIFIER + "\\s*?(DESC)\\s*?(NULLS\\s*?LAST)\\s*?"); + + private SortFields() {} + + public static SortOrder parseSortFields(Schema schema, List fields) + { + SortOrder.Builder builder = SortOrder.builderFor(schema); + for (String field : fields) { + parseSortField(builder, field); + } + return builder.build(); + } + + public static void parseSortField(SortOrder.Builder builder, String field) + { + boolean matched = false || + tryMatchYear(builder, field) || + tryMatchMonth(builder, field) || + tryMatchDay(builder, field) || + tryMatchHour(builder, field) || + tryMatchBucket(builder, field) || + tryMatchTruncate(builder, field) || + tryMatchWithoutTransform(builder, field); + + if (!matched) { + throw new IllegalArgumentException("Invalid sort field declaration: " + field); + } + } + + private static boolean tryMatchWithoutTransform(SortOrder.Builder builder, String field) + { + return tryMatch(field, IDENTITY_ASC_NULLS_FIRST_PATTERN, match -> builder.asc(fromIdentifier(match.group(1).trim()), NullOrder.NULLS_FIRST)) || + tryMatch(field, IDENTITY_ASC_NULLS_LAST_PATTERN, match -> builder.asc(fromIdentifier(match.group(1).trim()), NullOrder.NULLS_LAST)) || + tryMatch(field, IDENTITY_ASC_PATTERN, match -> builder.asc(fromIdentifier(match.group(1).trim()))) || + tryMatch(field, IDENTITY_DESC_NULLS_FIRST_PATTERN, match -> builder.desc(fromIdentifier(match.group(1).trim()), NullOrder.NULLS_FIRST)) || + tryMatch(field, IDENTITY_DESC_NULLS_LAST_PATTERN, match -> builder.desc(fromIdentifier(match.group(1).trim()), NullOrder.NULLS_LAST)) || + tryMatch(field, IDENTITY_DESC_PATTERN, match -> builder.desc(fromIdentifier(match.group(1).trim()))) || + tryMatch(field, IDENTITY_PATTERN, match -> builder.asc(fromIdentifier(match.group(1).trim()))) || + false; + } + + private static boolean tryMatchBucket(SortOrder.Builder builder, String field) + { + return tryMatch(field, BUCKET_ASC_NULLS_FIRST_PATTERN, match -> builder.asc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, BUCKET_ASC_NULLS_LAST_PATTERN, match -> builder.asc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, BUCKET_ASC_PATTERN, match -> builder.asc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + tryMatch(field, BUCKET_DESC_NULLS_FIRST_PATTERN, match -> builder.desc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, BUCKET_DESC_NULLS_LAST_PATTERN, match -> builder.desc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, BUCKET_DESC_PATTERN, match -> builder.desc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + tryMatch(field, BUCKET_PATTERN, match -> builder.asc(Expressions.bucket(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + false; + } + + private static boolean tryMatchTruncate(SortOrder.Builder builder, String field) + { + return tryMatch(field, TRUNCATE_ASC_NULLS_FIRST_PATTERN, match -> builder.asc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, TRUNCATE_ASC_NULLS_LAST_PATTERN, match -> builder.asc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, TRUNCATE_ASC_PATTERN, match -> builder.asc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + tryMatch(field, TRUNCATE_DESC_NULLS_FIRST_PATTERN, match -> builder.desc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, TRUNCATE_DESC_NULLS_LAST_PATTERN, match -> builder.desc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, TRUNCATE_DESC_PATTERN, match -> builder.desc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + tryMatch(field, TRUNCATE_PATTERN, match -> builder.asc(Expressions.truncate(fromIdentifier(match.group(1).trim()), parseInt(match.group(2).trim())))) || + false; + } + + private static boolean tryMatchHour(SortOrder.Builder builder, String field) + { + return tryMatch(field, HOUR_PATTERN_ASC_NULLS_FIRST, match -> builder.asc(Expressions.hour(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, HOUR_PATTERN_ASC_NULLS_LAST, match -> builder.asc(Expressions.hour(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, HOUR_PATTERN_ASC, match -> builder.asc(Expressions.hour(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, HOUR_PATTERN_DESC_NULLS_FIRST, match -> builder.desc(Expressions.hour(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, HOUR_PATTERN_DESC_NULLS_LAST, match -> builder.desc(Expressions.hour(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, HOUR_PATTERN_DESC, match -> builder.desc(Expressions.hour(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, HOUR_PATTERN, match -> builder.asc(Expressions.hour(fromIdentifier(match.group(1).trim())))) || + false; + } + + private static boolean tryMatchDay(SortOrder.Builder builder, String field) + { + return tryMatch(field, DAY_PATTERN_ASC_NULLS_FIRST, match -> builder.asc(Expressions.day(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, DAY_PATTERN_ASC_NULLS_LAST, match -> builder.asc(Expressions.day(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, DAY_PATTERN_ASC, match -> builder.asc(Expressions.day(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, DAY_PATTERN_DESC_NULLS_FIRST, match -> builder.desc(Expressions.day(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, DAY_PATTERN_DESC_NULLS_LAST, match -> builder.desc(Expressions.day(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, DAY_PATTERN_DESC, match -> builder.desc(Expressions.day(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, DAY_PATTERN, match -> builder.asc(Expressions.day(fromIdentifier(match.group(1).trim())))) || + false; + } + + private static boolean tryMatchMonth(SortOrder.Builder builder, String field) + { + return tryMatch(field, MONTH_PATTERN_ASC_NULLS_FIRST, match -> builder.asc(Expressions.month(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, MONTH_PATTERN_ASC_NULLS_LAST, match -> builder.asc(Expressions.month(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, MONTH_PATTERN_ASC, match -> builder.asc(Expressions.month(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, MONTH_PATTERN_DESC_NULLS_FIRST, match -> builder.desc(Expressions.month(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, MONTH_PATTERN_DESC_NULLS_LAST, match -> builder.desc(Expressions.month(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, MONTH_PATTERN_DESC, match -> builder.desc(Expressions.month(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, MONTH_PATTERN, match -> builder.asc(Expressions.month(fromIdentifier(match.group(1).trim())))) || + false; + } + + private static boolean tryMatchYear(SortOrder.Builder builder, String field) + { + return tryMatch(field, YEAR_PATTERN_ASC_NULLS_FIRST, match -> builder.asc(Expressions.year(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, YEAR_PATTERN_ASC_NULLS_LAST, match -> builder.asc(Expressions.year(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, YEAR_PATTERN_ASC, match -> builder.asc(Expressions.year(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, YEAR_PATTERN_DESC_NULLS_FIRST, match -> builder.desc(Expressions.year(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_FIRST)) || + tryMatch(field, YEAR_PATTERN_DESC_NULLS_LAST, match -> builder.desc(Expressions.year(fromIdentifier(match.group(1).trim())), NullOrder.NULLS_LAST)) || + tryMatch(field, YEAR_PATTERN_DESC, match -> builder.desc(Expressions.year(fromIdentifier(match.group(1).trim())))) || + tryMatch(field, YEAR_PATTERN, match -> builder.asc(Expressions.year(fromIdentifier(match.group(1).trim())))) || + false; + } + + private static boolean tryMatch(String value, Pattern pattern, Consumer match) + { + Matcher matcher = pattern.matcher(value); + if (matcher.matches()) { + match.accept(matcher.toMatchResult()); + return true; + } + return false; + } + + public static List toSortFields(SortOrder spec) + { + return spec.fields().stream() + .map(field -> toSortField(spec, field)) + .collect(toImmutableList()); + } + + private static String toSortField(SortOrder spec, SortField field) + { + String name = toIdentifier(spec.schema().findColumnName(field.sourceId())); + String transform = field.transform().toString(); + String sortDirection = field.direction().toString(); + String nullOrder = field.nullOrder().toString(); + String suffix = format("%s %s", sortDirection, nullOrder); + + switch (transform) { + case "identity": + return format("%s %s", name, suffix); + case "year": + case "month": + case "day": + case "hour": + return format("%s(%s) %s", transform, name, suffix); + } + + Matcher matcher = ICEBERG_BUCKET_PATTERN.matcher(transform); + if (matcher.matches()) { + return format("bucket(%s, %s) %s", name, matcher.group(1), suffix); + } + + matcher = ICEBERG_TRUNCATE_PATTERN.matcher(transform); + if (matcher.matches()) { + return format("truncate(%s, %s) %s", name, matcher.group(1), suffix); + } + + throw new UnsupportedOperationException("Unsupported partition transform: " + field); + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java index fd4bfbde91c9..36d73d42e2af 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/AbstractTrinoCatalog.java @@ -35,6 +35,7 @@ import net.jodah.failsafe.RetryPolicy; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableOperations; @@ -165,11 +166,12 @@ protected Transaction newCreateTableTransaction( SchemaTableName schemaTableName, Schema schema, PartitionSpec partitionSpec, + SortOrder sortOrder, String location, Map properties, Optional owner) { - TableMetadata metadata = newTableMetadata(schema, partitionSpec, location, properties); + TableMetadata metadata = newTableMetadata(schema, partitionSpec, sortOrder, location, properties); TableOperations ops = tableOperationsProvider.createTableOperations( this, session, diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java index ad3307005517..b9147b190098 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/TrinoCatalog.java @@ -23,6 +23,7 @@ import io.trino.spi.security.TrinoPrincipal; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.Transaction; @@ -68,6 +69,7 @@ Transaction newCreateTableTransaction( SchemaTableName schemaTableName, Schema schema, PartitionSpec partitionSpec, + SortOrder sortOrder, String location, Map properties); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index e9b4174a6a37..544966dd2349 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -60,6 +60,7 @@ import org.apache.iceberg.BaseTable; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableMetadataParser; @@ -330,6 +331,7 @@ public Transaction newCreateTableTransaction( SchemaTableName schemaTableName, Schema schema, PartitionSpec partitionSpec, + SortOrder sortOrder, String location, Map properties) { @@ -338,6 +340,7 @@ public Transaction newCreateTableTransaction( schemaTableName, schema, partitionSpec, + sortOrder, location, properties, Optional.of(session.getUser())); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java index ffe9a7782c7a..35c3012a1df0 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/hms/TrinoHiveCatalog.java @@ -45,6 +45,7 @@ import org.apache.iceberg.BaseTable; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.Transaction; @@ -229,6 +230,7 @@ public Transaction newCreateTableTransaction( SchemaTableName schemaTableName, Schema schema, PartitionSpec partitionSpec, + SortOrder sortOrder, String location, Map properties) { @@ -237,6 +239,7 @@ public Transaction newCreateTableTransaction( schemaTableName, schema, partitionSpec, + sortOrder, location, properties, isUsingSystemSecurity ? Optional.empty() : Optional.of(session.getUser())); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 98b71d8323b0..7145532cde89 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -1035,6 +1035,219 @@ public void testCreatePartitionedTableWithQuotedIdentifierCasing(String columnNa } } + @Test + public void testCreateSortedWithPartitionTable() + { + assertUpdate("" + + "CREATE TABLE test_sorted_with_partition_table (" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " \"a quoted, field\" varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + + ") " + + "WITH (" + + "partitioning = ARRAY[" + + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " ], " + + "sorted_by = ARRAY[" + + " 'a_boolean', " + + " 'an_integer', " + + " 'bucket( an_integer , 10)', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " '\"a quoted, field\"', " + + " 'truncate(\"a quoted, field\", 5)', " + + " 'truncate(a_varchar , 5)', " + + " 'truncate(a_varchar, 5)', " + + " 'a_varbinary', " + + " 'a_date', " + + " ' year( a_date ) ', " + + " 'month( a_date)', " + + " 'day(a_date)', " + + " 'hour( a_date)', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + " ]" + + ")"); + + assertQueryReturnsEmptyResult("SELECT * FROM test_sorted_with_partition_table"); + + dropTable("test_sorted_with_partition_table"); + } + + @Test + public void testCreateSortedTable() + { + assertUpdate("" + + "CREATE TABLE test_sorted_table (" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + + ") " + + "WITH (" + + "sorted_by = ARRAY[" + + " 'a_boolean', " + + " 'an_integer', " + + " 'bucket(an_integer, 10)', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'truncate(a_varchar, 5)', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'year(a_date)', " + + " ' month(a_date ) ', " + + " 'day(a_date)', " + + " 'hour(a_date)', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + " ]" + + ")"); + + assertQueryReturnsEmptyResult("SELECT * FROM test_sorted_table"); + + dropTable("test_sorted_table"); + } + + @Test + public void testCreateBlankSortedTable() + { + assertUpdate("" + + "CREATE TABLE test_blank_sorted_table (" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + + ") " + + "WITH (" + + "partitioning = ARRAY[" + + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid' " + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " ], " + + "sorted_by = ARRAY[]" + + ")"); + + assertQueryReturnsEmptyResult("SELECT * FROM test_blank_sorted_table"); + + dropTable("test_blank_sorted_table"); + } + + @DataProvider(name = "sortedTableWithQuotedIdentifierCasing") + public static Object[][] sortedTableWithQuotedIdentifierCasing() + { + return new Object[][] { + {"x", "x", true}, + {"X", "x", true}, + {"\"x\"", "x", true}, + {"\"X\"", "x", true}, + {"x", "\"x\"", true}, + {"X", "\"x\"", true}, + {"\"x\"", "\"x\"", true}, + {"\"X\"", "\"x\"", true}, + {"x", "X", true}, + {"X", "X", true}, + {"\"x\"", "X", true}, + {"\"X\"", "X", true}, + {"x", "\"X\"", false}, + {"X", "\"X\"", false}, + {"\"x\"", "\"X\"", false}, + {"\"X\"", "\"X\"", false}, + }; + } + + @Test(dataProvider = "sortedTableWithQuotedIdentifierCasing") + public void testCreateSortedTableWithQuotedIdentifierCasing(String columnName, String sortField, boolean success) + { + String tableName = "sorting_" + randomTableSuffix(); + @Language("SQL") String sql = format("CREATE TABLE %s (%s bigint) WITH (sorted_by = ARRAY['%s'])", tableName, columnName, sortField); + if (success) { + assertThat(query(sql)).matches("VALUES (true)"); + dropTable(tableName); + } + else { + assertQueryFails(sql, "Unable to parse sorting value"); + } + } + @Test public void testTableComments() { diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseTrinoCatalogTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseTrinoCatalogTest.java index 4c8f949c2500..a98967d1aa91 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseTrinoCatalogTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseTrinoCatalogTest.java @@ -22,9 +22,12 @@ import io.trino.spi.security.PrincipalType; import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.type.VarcharType; +import org.apache.iceberg.NullOrder; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; import org.apache.iceberg.Table; +import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.types.Types; import org.testng.annotations.Test; @@ -80,6 +83,7 @@ public void testCreateTable() schemaTableName, new Schema(Types.NestedField.of(1, true, "col1", Types.LongType.get())), PartitionSpec.unpartitioned(), + SortOrder.unsorted(), tmpDirectory.toAbsolutePath().toString(), ImmutableMap.of()) .commitTransaction(); @@ -92,6 +96,77 @@ public void testCreateTable() assertEquals(icebergTable.schema().columns().get(0).name(), "col1"); assertEquals(icebergTable.schema().columns().get(0).type(), Types.LongType.get()); assertEquals(icebergTable.location(), tmpDirectory.toAbsolutePath().toString()); + assertEquals(icebergTable.sortOrder().isUnsorted(), true); + assertEquals(icebergTable.properties(), ImmutableMap.of()); + + catalog.dropTable(SESSION, schemaTableName); + assertThat(catalog.listTables(SESSION, Optional.of(namespace))).doesNotContain(schemaTableName); + assertThat(catalog.listTables(SESSION, Optional.empty())).doesNotContain(schemaTableName); + } + finally { + try { + catalog.dropNamespace(SESSION, namespace); + } + catch (Exception e) { + LOG.warn("Failed to clean up namespace: %s", namespace); + } + } + } + + @Test + public void testCreateWithSortTable() + throws IOException + { + TrinoCatalog catalog = createTrinoCatalog(false); + Path tmpDirectory = Files.createTempDirectory("iceberg_catalog_test_create_sort_table_"); + tmpDirectory.toFile().deleteOnExit(); + + String namespace = "test_create_sort_table_" + randomTableSuffix(); + String table = "tableName"; + SchemaTableName schemaTableName = new SchemaTableName(namespace, table); + try { + catalog.createNamespace(SESSION, namespace, ImmutableMap.of(), new TrinoPrincipal(PrincipalType.USER, SESSION.getUser())); + Schema tableSchema = new Schema(Types.NestedField.of(1, true, "col1", Types.LongType.get()), + Types.NestedField.of(2, true, "col2", Types.StringType.get()), + Types.NestedField.of(3, true, "col3", Types.DateType.get()), + Types.NestedField.of(4, true, "col4", Types.StringType.get())); + + SortOrder sortOrder = SortOrder.builderFor(tableSchema) + .asc("col1") + .desc("col2", NullOrder.NULLS_FIRST) + .desc("col3") + .desc(Expressions.year("col3"), NullOrder.NULLS_LAST) + .desc(Expressions.month("col3"), NullOrder.NULLS_FIRST) + .asc(Expressions.day("col3"), NullOrder.NULLS_FIRST) + .asc(Expressions.hour("col3"), NullOrder.NULLS_FIRST) + .desc(Expressions.bucket("col2", 10), NullOrder.NULLS_FIRST) + .desc(Expressions.truncate("col4", 5), NullOrder.NULLS_FIRST).build(); + catalog.newCreateTableTransaction( + SESSION, + schemaTableName, + tableSchema, + PartitionSpec.unpartitioned(), + sortOrder, + tmpDirectory.toAbsolutePath().toString(), + ImmutableMap.of()) + .commitTransaction(); + assertThat(catalog.listTables(SESSION, Optional.of(namespace))).contains(schemaTableName); + assertThat(catalog.listTables(SESSION, Optional.empty())).contains(schemaTableName); + + Table icebergTable = catalog.loadTable(SESSION, schemaTableName); + assertEquals(icebergTable.name(), quotedTableName(schemaTableName)); + assertEquals(icebergTable.schema().columns().size(), 4); + assertEquals(icebergTable.schema().columns().get(0).name(), "col1"); + assertEquals(icebergTable.schema().columns().get(0).type(), Types.LongType.get()); + assertEquals(icebergTable.schema().columns().get(1).name(), "col2"); + assertEquals(icebergTable.schema().columns().get(1).type(), Types.StringType.get()); + assertEquals(icebergTable.location(), tmpDirectory.toAbsolutePath().toString()); + assertEquals(icebergTable.schema().columns().get(2).name(), "col3"); + assertEquals(icebergTable.schema().columns().get(2).type(), Types.DateType.get()); + assertEquals(icebergTable.schema().columns().get(3).name(), "col4"); + assertEquals(icebergTable.schema().columns().get(3).type(), Types.StringType.get()); + assertEquals(icebergTable.location(), tmpDirectory.toAbsolutePath().toString()); + assertEquals(icebergTable.sortOrder(), sortOrder); assertEquals(icebergTable.properties(), ImmutableMap.of()); catalog.dropTable(SESSION, schemaTableName); @@ -129,6 +204,7 @@ public void testRenameTable() sourceSchemaTableName, new Schema(Types.NestedField.of(1, true, "col1", Types.LongType.get())), PartitionSpec.unpartitioned(), + SortOrder.unsorted(), tmpDirectory.toAbsolutePath().toString(), ImmutableMap.of()) .commitTransaction(); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSortFields.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSortFields.java new file mode 100644 index 000000000000..007ef5dd7dc1 --- /dev/null +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestSortFields.java @@ -0,0 +1,270 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import org.apache.iceberg.NullOrder; +import org.apache.iceberg.Schema; +import org.apache.iceberg.SortOrder; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.types.Types.DoubleType; +import org.apache.iceberg.types.Types.ListType; +import org.apache.iceberg.types.Types.LongType; +import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.types.Types.StringType; +import org.apache.iceberg.types.Types.TimestampType; +import org.assertj.core.api.AbstractThrowableAssert; +import org.testng.annotations.Test; + +import java.util.Locale; +import java.util.function.Consumer; + +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.plugin.iceberg.IcebergUtil.fromIdentifier; +import static io.trino.plugin.iceberg.IcebergUtil.toIdentifier; +import static io.trino.plugin.iceberg.SortFields.parseSortField; +import static io.trino.plugin.iceberg.SortFields.toSortFields; +import static io.trino.testing.assertions.Assert.assertEquals; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; + +public class TestSortFields +{ + @Test + public void testParse() + { + assertParse("order_key", sortedOrder(builder -> builder.asc("order_key"))); + assertParse("order_key ASC", sortedOrder(builder -> builder.asc("order_key"))); + assertParse("order_key ASC NULLS FIRST", sortedOrder(builder -> builder.asc("order_key"))); + assertParse("order_key ASC NULLS FIRST", sortedOrder(builder -> builder.asc("order_key", NullOrder.NULLS_FIRST))); + assertParse("order_key ASC NULLS LAST", sortedOrder(builder -> builder.asc("order_key", NullOrder.NULLS_LAST))); + assertParse("order_key DESC", sortedOrder(builder -> builder.desc("order_key"))); + assertParse("order_key DESC NULLS FIRST", sortedOrder(builder -> builder.desc("order_key", NullOrder.NULLS_FIRST))); + assertParse("order_key DESC NULLS LAST", sortedOrder(builder -> builder.desc("order_key", NullOrder.NULLS_LAST))); + assertParse("order_key DESC NULLS LAST", sortedOrder(builder -> builder.desc("order_key"))); + + assertParse("comment", sortedOrder(builder -> builder.asc("comment"))); + assertParse("\"comment\"", sortedOrder(builder -> builder.asc("comment"))); + assertParse("\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\"", sortedOrder(builder -> builder.asc("\"another\" \"quoted\" \"field\""))); + assertParse("\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\" ASC NULLS FIRST ", sortedOrder(builder -> builder.asc("\"another\" \"quoted\" \"field\""))); + assertParse("\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\" ASC NULLS LAST ", sortedOrder(builder -> builder.asc("\"another\" \"quoted\" \"field\"", NullOrder.NULLS_LAST))); + assertParse("\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\" DESC NULLS FIRST", sortedOrder(builder -> builder.desc("\"another\" \"quoted\" \"field\"", NullOrder.NULLS_FIRST))); + assertParse(" comment ", sortedOrder(builder -> builder.asc("comment"))); + assertParse("comment ASC", sortedOrder(builder -> builder.asc("comment"))); + assertParse(" comment ASC ", sortedOrder(builder -> builder.asc("comment"))); + assertParse("comment ASC NULLS FIRST", sortedOrder(builder -> builder.asc("comment"))); + assertParse(" comment ASC NULLS FIRST ", sortedOrder(builder -> builder.asc("comment"))); + assertParse("comment ASC NULLS FIRST", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_FIRST))); + assertParse(" comment ASC NULLS FIRST ", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_FIRST))); + assertParse("comment ASC NULLS FIRST", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_FIRST))); + assertParse(" comment ASC NULLS FIRST ", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_FIRST))); + assertParse("comment ASC NULLS LAST", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_LAST))); + assertParse(" comment ASC NULLS LAST ", sortedOrder(builder -> builder.asc("comment", NullOrder.NULLS_LAST))); + assertParse("comment DESC", sortedOrder(builder -> builder.desc("comment"))); + assertParse(" comment DESC ", sortedOrder(builder -> builder.desc("comment"))); + assertParse("comment DESC NULLS FIRST", sortedOrder(builder -> builder.desc("comment", NullOrder.NULLS_FIRST))); + assertParse(" comment DESC NULLS FIRST ", sortedOrder(builder -> builder.desc("comment", NullOrder.NULLS_FIRST))); + assertParse("comment DESC NULLS LAST", sortedOrder(builder -> builder.desc("comment", NullOrder.NULLS_LAST))); + assertParse(" comment DESC NULLS LAST ", sortedOrder(builder -> builder.desc("comment", NullOrder.NULLS_LAST))); + assertParse("comment DESC NULLS LAST", sortedOrder(builder -> builder.desc("comment"))); + assertParse(" comment DESC NULLS LAST ", sortedOrder(builder -> builder.desc("comment"))); + + assertParse("year(ts)", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse("YEAR(ts)", sortedOrder(builder -> builder.asc(Expressions.year("ts"))), "year(ts) ASC NULLS FIRST"); + assertParse("YeaR(TS)", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse("yEAR(TS)", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse(" year( ts )", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse("year(\"quoted ts\")", sortedOrder(builder -> builder.asc(Expressions.year("quoted ts")))); + assertParse("year(ts) ASC", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse(" year( ts ) ASC ", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse("year(\"quoted ts\") ASC", sortedOrder(builder -> builder.asc(Expressions.year("quoted ts")))); + assertParse("year(ts) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse(" year( ts ) ASC NULLS FIRST ", sortedOrder(builder -> builder.asc(Expressions.year("ts")))); + assertParse("year(ts) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.year("ts"), NullOrder.NULLS_LAST))); + assertParse("year(\"quoted ts\") ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.year("quoted ts"), NullOrder.NULLS_LAST))); + assertParse(" year( ts ) ASC NULLS LAST ", sortedOrder(builder -> builder.asc(Expressions.year("ts"), NullOrder.NULLS_LAST))); + assertParse(" year( ts ) DESC ", sortedOrder(builder -> builder.desc(Expressions.year("ts")))); + assertParse("year(ts) DESC", sortedOrder(builder -> builder.desc(Expressions.year("ts")))); + assertParse(" year(ts) DESC ", sortedOrder(builder -> builder.desc(Expressions.year("ts")))); + assertParse("year(ts) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.year("ts"), NullOrder.NULLS_FIRST))); + assertParse(" year(ts) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.year("ts"), NullOrder.NULLS_FIRST))); + assertParse("year(ts) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.year("ts"), NullOrder.NULLS_LAST))); + assertParse(" year( ts ) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.year("ts"), NullOrder.NULLS_LAST))); + + assertParse("month(ts)", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("MONTH( ts )", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("MonTH( ts )", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("monTH( ts )", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse(" month( ts )", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("month(ts) ASC", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse(" month( ts ) ASC ", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("month(ts) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse(" month( ts ) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.month("ts")))); + assertParse("month(ts) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.month("ts"), NullOrder.NULLS_LAST))); + assertParse(" month( ts ) ASC NULLS LAST ", sortedOrder(builder -> builder.asc(Expressions.month("ts"), NullOrder.NULLS_LAST))); + assertParse("month(ts) DESC", sortedOrder(builder -> builder.desc(Expressions.month("ts")))); + assertParse(" month( ts ) DESC ", sortedOrder(builder -> builder.desc(Expressions.month("ts")))); + assertParse("month(ts) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.month("ts"), NullOrder.NULLS_FIRST))); + assertParse(" month( ts ) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.month("ts"), NullOrder.NULLS_FIRST))); + assertParse("month(ts) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.month("ts"), NullOrder.NULLS_LAST))); + assertParse(" month( ts ) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.month("ts"), NullOrder.NULLS_LAST))); + + assertParse("day(ts)", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("DAY(ts)", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("DaY(ts)", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("daY(ts)", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse(" day( ts )", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("day(ts) ASC", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse(" day( ts ) ASC ", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("day(ts) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse(" day( ts ) ASC NULLS FIRST ", sortedOrder(builder -> builder.asc(Expressions.day("ts")))); + assertParse("day(ts) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.day("ts"), NullOrder.NULLS_LAST))); + assertParse(" day( ts ) ASC NULLS LAST ", sortedOrder(builder -> builder.asc(Expressions.day("ts"), NullOrder.NULLS_LAST))); + assertParse("day(ts) DESC", sortedOrder(builder -> builder.desc(Expressions.day("ts")))); + assertParse(" day( ts ) DESC ", sortedOrder(builder -> builder.desc(Expressions.day("ts")))); + assertParse("day(ts) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.day("ts"), NullOrder.NULLS_FIRST))); + assertParse(" day( ts ) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.day("ts"), NullOrder.NULLS_FIRST))); + assertParse("day(ts) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.day("ts"), NullOrder.NULLS_LAST))); + assertParse(" day( ts ) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.day("ts"), NullOrder.NULLS_LAST))); + + assertParse("hour(ts)", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("HOUR(ts)", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("HouR(ts)", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("houR(ts)", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse(" hour( ts )", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("hour(ts) ASC", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse(" hour( ts ) ASC ", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("hour(ts) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse(" hour( ts ) ASC NULLS FIRST ", sortedOrder(builder -> builder.asc(Expressions.hour("ts")))); + assertParse("hour(ts) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.hour("ts"), NullOrder.NULLS_LAST))); + assertParse(" hour( ts ) ASC NULLS LAST ", sortedOrder(builder -> builder.asc(Expressions.hour("ts"), NullOrder.NULLS_LAST))); + assertParse("hour(ts) DESC", sortedOrder(builder -> builder.desc(Expressions.hour("ts")))); + assertParse(" hour( ts ) DESC ", sortedOrder(builder -> builder.desc(Expressions.hour("ts")))); + assertParse("hour(ts) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.hour("ts"), NullOrder.NULLS_FIRST))); + assertParse(" hour( ts ) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.hour("ts"), NullOrder.NULLS_FIRST))); + assertParse("hour(ts) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.hour("ts"), NullOrder.NULLS_LAST))); + assertParse(" hour( ts ) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.hour("ts"), NullOrder.NULLS_LAST))); + + assertParse("bucket(order_key,42)", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse("BUCKET(order_key, 42)", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse("BUckeT(order_key, 42)", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse("buckET(order_key, 42 )", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse(" bucket( order_key , 42 )", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse("bucket(order_key, 42) ASC", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse(" bucket( order_key , 42) ASC ", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42)))); + assertParse("bucket(order_key, 42) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42), NullOrder.NULLS_FIRST))); + assertParse(" bucket( order_key , 42) ASC NULLS FIRST ", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42), NullOrder.NULLS_FIRST))); + assertParse("bucket(order_key, 42) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + assertParse(" bucket( order_key , 42) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + assertParse("bucket(order_key, 42) DESC", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + assertParse(" bucket( order_key , 42) DESC ", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + assertParse("bucket(order_key, 42) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_FIRST))); + assertParse(" bucket( order_key , 42) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_FIRST))); + assertParse("bucket(order_key, 42) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + assertParse(" bucket( order_key , 42) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.bucket("order_key", 42), NullOrder.NULLS_LAST))); + + assertParse("truncate(comment, 10)", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse("TRUNCATE(comment, 10)", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse("TRuncaTE(comment, 10)", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse("truncaTE(comment, 10)", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse(" truncate( comment , 10)", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse("truncate(comment, 10) ASC", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse(" truncate( comment , 10) ASC ", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10)))); + assertParse("truncate(comment, 10) ASC NULLS FIRST", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10), NullOrder.NULLS_FIRST))); + assertParse(" truncate( comment , 10) ASC NULLS FIRST ", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10), NullOrder.NULLS_FIRST))); + assertParse("truncate(comment, 10) ASC NULLS LAST", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse(" truncate( comment , 10) ASC NULLS LAST ", sortedOrder(builder -> builder.asc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse("truncate(comment, 10) DESC", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse(" truncate( comment , 10) DESC ", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse("truncate(comment, 10) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_FIRST))); + assertParse(" truncate( comment , 10) DESC NULLS FIRST ", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_FIRST))); + assertParse("truncate(comment, 10) DESC NULLS LAST", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse(" truncate( comment , 10) DESC NULLS LAST ", sortedOrder(builder -> builder.desc(Expressions.truncate("comment", 10), NullOrder.NULLS_LAST))); + assertParse("truncate(\"quoted field\", 5) DESC NULLS FIRST", sortedOrder(builder -> builder.desc(Expressions.truncate("quoted field", 5), NullOrder.NULLS_FIRST))); + assertParse("truncate(\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\", 5) DESC NULLS FIRST", + sortedOrder(builder -> builder.desc(Expressions.truncate("\"another\" \"quoted\" \"field\"", 5), NullOrder.NULLS_FIRST)), + "truncate(\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\", 5) DESC NULLS FIRST"); + assertInvalid("bucket()", "Invalid sort field declaration: bucket()"); + assertInvalid("abc", "Cannot find field 'abc' in struct: struct<1: order_key: required long, 2: ts: required timestamp, 3: price: required double, 4: comment: optional string, 5: notes: optional list, 7: quoted field: optional string, 8: quoted ts: optional timestamp, 9: \"another\" \"quoted\" \"field\": optional string>"); + assertInvalid("notes", "Cannot sort by non-primitive source field: list"); + assertInvalid("bucket(price, 42)", "Cannot bind: bucket[42] cannot transform double values from 'price'"); + assertInvalid("bucket(notes, 88)", "Cannot bind: bucket[88] cannot transform list values from 'notes'"); + assertInvalid("truncate(ts, 13)", "Cannot truncate type: timestamp"); + } + + private static void assertParse(String value, SortOrder expected, String canonicalRepresentation) + { + assertParse(value, expected); + assertEquals(getOnlyElement(toSortFields(expected)), canonicalRepresentation); + } + + private static void assertParse(String value, SortOrder expected) + { + assertEquals(expected.fields().size(), 1); + assertEquals(parseField(value), expected); + } + + private static void assertInvalid(String value, String message) + { + AbstractThrowableAssert throwableAssert = assertThatThrownBy(() -> parseField(value)) + .isInstanceOfAny( + IllegalArgumentException.class, + UnsupportedOperationException.class, + ValidationException.class); + + throwableAssert.hasMessage(message); + } + + private static SortOrder parseField(String value) + { + return sortedOrder(builder -> parseSortField(builder, value)); + } + + private static SortOrder sortedOrder(Consumer consumer) + { + Schema schema = new Schema( + NestedField.required(1, "order_key", LongType.get()), + NestedField.required(2, "ts", TimestampType.withoutZone()), + NestedField.required(3, "price", DoubleType.get()), + NestedField.optional(4, "comment", StringType.get()), + NestedField.optional(5, "notes", ListType.ofRequired(6, StringType.get())), + NestedField.optional(7, "quoted field", StringType.get()), + NestedField.optional(8, "quoted ts", TimestampType.withoutZone()), + NestedField.optional(9, "\"another\" \"quoted\" \"field\"", StringType.get())); + + SortOrder.Builder builder = SortOrder.builderFor(schema); + consumer.accept(builder); + return builder.build(); + } + + @Test + public void testFromIdentifier() + { + assertEquals(fromIdentifier("test"), "test"); + assertEquals(fromIdentifier("TEST"), "test"); + assertEquals(fromIdentifier("TEST"), "TEST".toLowerCase(Locale.ROOT)); + assertEquals(fromIdentifier("\" test\""), " test"); + assertEquals(fromIdentifier("\"20days\""), "20days"); + assertEquals(fromIdentifier("\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\""), "\"another\" \"quoted\" \"field\""); + } + + @Test + public void testToIdentifier() + { + assertEquals(toIdentifier("test"), "test"); + assertEquals(toIdentifier("TEST"), "test".toUpperCase(Locale.ROOT)); + assertEquals(toIdentifier(" test"), " test"); + assertEquals(toIdentifier("20days"), "\"20days\""); + assertEquals(toIdentifier("\"another\" \"quoted\" \"field\""), "\"\"\"another\"\" \"\"quoted\"\" \"\"field\"\"\""); + } +}