Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
connector.name=iceberg
hive.metastore.uri=thrift://hadoop-master:9083
# TODO: Remove this config to test default read behavior once Spark writer is fixed. See https://github.com/trinodb/trino/issues/6369 for details
iceberg.use-file-size-from-metadata=false
Comment thread
lxynov marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import io.trino.tempto.ProductTest;
import io.trino.tempto.query.QueryResult;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.sql.Date;
Expand Down Expand Up @@ -42,10 +43,16 @@ public class TestSparkCompatibility
private static final String SPARK_CATALOG = "iceberg_test";
private static final String PRESTO_CATALOG = "iceberg";

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingSparkData()
@DataProvider(name = "storage_formats")
public static Object[][] storageFormats()
{
String baseTableName = "test_presto_reading_primitive_types";
return new String[][] {{"ORC"}, {"PARQUET"}};
}

@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingSparkData(String format)
{
String baseTableName = "test_presto_reading_primitive_types_" + format;
String sparkTableName = sparkTableName(baseTableName);

String sparkTableDefinition =
Expand All @@ -58,7 +65,7 @@ public void testPrestoReadingSparkData()
", _boolean BOOLEAN" +
", _timestamp TIMESTAMP" +
", _date DATE" +
") USING ICEBERG";
") USING ICEBERG TBLPROPERTIES ('write.format.default' = '" + format + "')";
onSpark().executeQuery(format(sparkTableDefinition, sparkTableName));

String values = "VALUES (" +
Expand Down Expand Up @@ -94,10 +101,10 @@ public void testPrestoReadingSparkData()
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingPrestoData()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingPrestoData(String format)
{
String baseTableName = "test_spark_reading_primitive_types";
String baseTableName = "test_spark_reading_primitive_types_" + format;
String prestoTableName = prestoTableName(baseTableName);

String prestoTableDefinition =
Expand All @@ -110,7 +117,7 @@ public void testSparkReadingPrestoData()
", _boolean BOOLEAN" +
//", _timestamp TIMESTAMP" +
", _date DATE" +
") WITH (format = 'ORC')";
") WITH (format = '" + format + "')";
onPresto().executeQuery(format(prestoTableDefinition, prestoTableName));

String values = "VALUES (" +
Expand Down Expand Up @@ -161,12 +168,12 @@ public void testPrestoCreatesSparkDrops()
onSpark().executeQuery("DROP TABLE " + sparkTableName(baseTableName));
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadsPrestoPartitionedTable()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadsPrestoPartitionedTable(String format)
{
String baseTableName = "test_spark_reads_presto_partitioned_table";
String baseTableName = "test_spark_reads_presto_partitioned_table_" + format;
String prestoTableName = prestoTableName(baseTableName);
onPresto().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _bigint BIGINT) WITH (partitioning = ARRAY['_string'])", prestoTableName));
onPresto().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _bigint BIGINT) WITH (partitioning = ARRAY['_string'], format = '" + format + "')", prestoTableName));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
onPresto().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _bigint BIGINT) WITH (partitioning = ARRAY['_string'], format = '" + format + "')", prestoTableName));
onPresto().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _bigint BIGINT) WITH (partitioning = ARRAY['_string'], format = '%s')", prestoTableName, format));

sorry if it was confusing, meant a change like the above in #6699 (comment)

onPresto().executeQuery(format("INSERT INTO %s VALUES ('a', 1001), ('b', 1002), ('c', 1003)", prestoTableName));

Row row = row("b", 1002);
Expand All @@ -178,12 +185,13 @@ public void testSparkReadsPrestoPartitionedTable()
onPresto().executeQuery("DROP TABLE " + prestoTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadsSparkPartitionedTable()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadsSparkPartitionedTable(String format)
{
String baseTableName = "test_spark_reads_presto_partitioned_table";
String baseTableName = "test_presto_reads_spark_partitioned_table_" + format;
String sparkTableName = sparkTableName(baseTableName);
onSpark().executeQuery(format("CREATE TABLE %s (_string STRING, _bigint BIGINT) USING ICEBERG PARTITIONED BY (_string)", sparkTableName));
onSpark().executeQuery(format("CREATE TABLE %s (_string STRING, _bigint BIGINT) USING ICEBERG PARTITIONED BY (_string)" +
"TBLPROPERTIES ('write.format.default' = '" + format + "')", sparkTableName));
onSpark().executeQuery(format("INSERT INTO %s VALUES ('a', 1001), ('b', 1002), ('c', 1003)", sparkTableName));

Row row = row("b", 1002);
Expand All @@ -196,10 +204,10 @@ public void testPrestoReadsSparkPartitionedTable()
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingCompositeSparkData()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingCompositeSparkData(String format)
{
String baseTableName = "test_presto_reading_spark_composites";
String baseTableName = "test_presto_reading_spark_composites_" + format;
String sparkTableName = sparkTableName(baseTableName);

String sparkTableDefinition = "" +
Expand All @@ -208,7 +216,8 @@ public void testPrestoReadingCompositeSparkData()
" info MAP<STRING, INT>,\n" +
" pets ARRAY<STRING>,\n" +
" user_info STRUCT<name:STRING, surname:STRING, age:INT, gender:STRING>)" +
" USING ICEBERG";
" USING ICEBERG" +
" TBLPROPERTIES ('write.format.default' = '" + format + "')";
onSpark().executeQuery(format(sparkTableDefinition, sparkTableName));

String insert = "" +
Expand All @@ -222,20 +231,23 @@ public void testPrestoReadingCompositeSparkData()
QueryResult prestoResult = onPresto().executeQuery(prestoSelect);
Row row = row("Doc213", 28, "Cat", "Claus");
assertThat(prestoResult).containsOnly(row);

onSpark().executeQuery("DROP TABLE " + sparkTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingCompositePrestoData()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingCompositePrestoData(String format)
{
String baseTableName = "test_spark_reading_presto_composites";
String baseTableName = "test_spark_reading_presto_composites_" + format;
String prestoTableName = prestoTableName(baseTableName);

String prestoTableDefinition = "" +
"CREATE TABLE %s (" +
" doc_id VARCHAR,\n" +
" info MAP(VARCHAR, INTEGER),\n" +
" pets ARRAY(VARCHAR),\n" +
" user_info ROW(name VARCHAR, surname VARCHAR, age INTEGER, gender VARCHAR))";
" user_info ROW(name VARCHAR, surname VARCHAR, age INTEGER, gender VARCHAR))" +
" WITH (format = '" + format + "')";
onPresto().executeQuery(format(prestoTableDefinition, prestoTableName));

String insert = "INSERT INTO %s VALUES('Doc213', MAP(ARRAY['age', 'children'], ARRAY[28, 3]), ARRAY['Dog', 'Cat', 'Pig'], ROW('Santa', 'Claus', 1000, 'MALE'))";
Expand All @@ -246,12 +258,14 @@ public void testSparkReadingCompositePrestoData()
QueryResult sparkResult = onSpark().executeQuery(sparkSelect);
Row row = row("Doc213", 28, "Cat", "Claus");
assertThat(sparkResult).containsOnly(row);

onPresto().executeQuery("DROP TABLE " + prestoTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingNestedSparkData()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testPrestoReadingNestedSparkData(String format)
{
String baseTableName = "test_presto_reading_nested_spark_data";
String baseTableName = "test_presto_reading_nested_spark_data_" + format;
String sparkTableName = sparkTableName(baseTableName);

String sparkTableDefinition = "" +
Expand All @@ -260,7 +274,8 @@ public void testPrestoReadingNestedSparkData()
", nested_map MAP<STRING, ARRAY<STRUCT<sname: STRING, snumber: INT>>>\n" +
", nested_array ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>\n" +
", nested_struct STRUCT<name:STRING, complicated: ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>>)\n" +
" USING ICEBERG";
" USING ICEBERG" +
" TBLPROPERTIES ('write.format.default' = '" + format + "')";
onSpark().executeQuery(format(sparkTableDefinition, sparkTableName));

String insert = "" +
Expand Down Expand Up @@ -302,20 +317,23 @@ public void testPrestoReadingNestedSparkData()

QueryResult prestoResult = onPresto().executeQuery(prestoSelect + prestoTableName);
assertThat(prestoResult).containsOnly(row);

onSpark().executeQuery("DROP TABLE " + sparkTableName);
}

@Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingNestedPrestoData()
@Test(dataProvider = "storage_formats", groups = {ICEBERG, PROFILE_SPECIFIC_TESTS})
public void testSparkReadingNestedPrestoData(String format)
{
String baseTableName = "test_spark_reading_nested_presto_data";
String baseTableName = "test_spark_reading_nested_presto_data_" + format;
String prestoTableName = prestoTableName(baseTableName);

String prestoTableDefinition = "" +
"CREATE TABLE %s (\n" +
" doc_id VARCHAR\n" +
", nested_map MAP(VARCHAR, ARRAY(ROW(sname VARCHAR, snumber INT)))\n" +
", nested_array ARRAY(MAP(VARCHAR, ARRAY(ROW(mname VARCHAR, mnumber INT))))\n" +
", nested_struct ROW(name VARCHAR, complicated ARRAY(MAP(VARCHAR, ARRAY(ROW(mname VARCHAR, mnumber INT))))))";
", nested_struct ROW(name VARCHAR, complicated ARRAY(MAP(VARCHAR, ARRAY(ROW(mname VARCHAR, mnumber INT))))))" +
" WITH (format = '" + format + "')";
onPresto().executeQuery(format(prestoTableDefinition, prestoTableName));

String insert = "" +
Expand Down Expand Up @@ -357,6 +375,8 @@ public void testSparkReadingNestedPrestoData()

QueryResult sparkResult = onSpark().executeQuery(sparkSelect + sparkTableName);
assertThat(sparkResult).containsOnly(row);

onPresto().executeQuery("DROP TABLE " + prestoTableName);
}

private static String sparkTableName(String tableName)
Expand Down