Skip to content

Commit 726fbd7

Browse files
pettyjamesmarhimondr
authored andcommitted
Add test for recursive caching directory listings
1 parent 7d0c344 commit 726fbd7

File tree

2 files changed

+125
-7
lines changed

2 files changed

+125
-7
lines changed

plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/BaseCachingDirectoryListerTest.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import com.google.common.collect.ImmutableList;
1717
import com.google.common.collect.ImmutableMap;
1818
import io.trino.plugin.hive.HiveQueryRunner;
19+
import io.trino.plugin.hive.metastore.PrincipalPrivileges;
1920
import io.trino.plugin.hive.metastore.Table;
2021
import io.trino.plugin.hive.metastore.file.FileHiveMetastore;
2122
import io.trino.testing.AbstractTestQueryFramework;
@@ -25,7 +26,9 @@
2526

2627
import java.nio.file.Path;
2728
import java.util.List;
29+
import java.util.Map;
2830
import java.util.NoSuchElementException;
31+
import java.util.Optional;
2932

3033
import static com.google.common.io.MoreFiles.deleteRecursively;
3134
import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE;
@@ -44,13 +47,18 @@ public abstract class BaseCachingDirectoryListerTest<C extends DirectoryLister>
4447
@Override
4548
protected QueryRunner createQueryRunner()
4649
throws Exception
50+
{
51+
return createQueryRunner(ImmutableMap.of("hive.allow-register-partition-procedure", "true"));
52+
}
53+
54+
protected QueryRunner createQueryRunner(Map<String, String> properties)
55+
throws Exception
4756
{
4857
Path temporaryMetastoreDirectory = createTempDirectory(null);
4958
closeAfterClass(() -> deleteRecursively(temporaryMetastoreDirectory, ALLOW_INSECURE));
5059
directoryLister = createDirectoryLister();
5160
return HiveQueryRunner.builder()
52-
.setHiveProperties(ImmutableMap.of(
53-
"hive.allow-register-partition-procedure", "true"))
61+
.setHiveProperties(properties)
5462
.setMetastore(distributedQueryRunner -> fileHiveMetastore = createTestingFileHiveMetastore(temporaryMetastoreDirectory.toFile()))
5563
.setDirectoryLister(directoryLister)
5664
.build();
@@ -324,17 +332,32 @@ public void testDropPartitionedTable()
324332
assertThat(isCached(tableGroup3PartitionLocation)).isFalse();
325333
}
326334

327-
private org.apache.hadoop.fs.Path getTableLocation(String schemaName, String tableName)
335+
protected Optional<Table> getTable(String schemaName, String tableName)
336+
{
337+
return fileHiveMetastore.getTable(schemaName, tableName);
338+
}
339+
340+
protected void createTable(Table table, PrincipalPrivileges principalPrivileges)
341+
{
342+
fileHiveMetastore.createTable(table, principalPrivileges);
343+
}
344+
345+
protected void dropTable(String schemaName, String tableName, boolean deleteData)
346+
{
347+
fileHiveMetastore.dropTable(schemaName, tableName, deleteData);
348+
}
349+
350+
protected org.apache.hadoop.fs.Path getTableLocation(String schemaName, String tableName)
328351
{
329-
return fileHiveMetastore.getTable(schemaName, tableName)
352+
return getTable(schemaName, tableName)
330353
.map(table -> table.getStorage().getLocation())
331354
.map(tableLocation -> new org.apache.hadoop.fs.Path(tableLocation))
332355
.orElseThrow(() -> new NoSuchElementException(format("The table %s.%s could not be found", schemaName, tableName)));
333356
}
334357

335-
private org.apache.hadoop.fs.Path getPartitionLocation(String schemaName, String tableName, List<String> partitionValues)
358+
protected org.apache.hadoop.fs.Path getPartitionLocation(String schemaName, String tableName, List<String> partitionValues)
336359
{
337-
Table table = fileHiveMetastore.getTable(schemaName, tableName)
360+
Table table = getTable(schemaName, tableName)
338361
.orElseThrow(() -> new NoSuchElementException(format("The table %s.%s could not be found", schemaName, tableName)));
339362

340363
return fileHiveMetastore.getPartition(table, partitionValues)
@@ -343,7 +366,7 @@ private org.apache.hadoop.fs.Path getPartitionLocation(String schemaName, String
343366
.orElseThrow(() -> new NoSuchElementException(format("The partition %s from the table %s.%s could not be found", partitionValues, schemaName, tableName)));
344367
}
345368

346-
private boolean isCached(org.apache.hadoop.fs.Path path)
369+
protected boolean isCached(org.apache.hadoop.fs.Path path)
347370
{
348371
return isCached(directoryLister, path);
349372
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.plugin.hive.fs;
15+
16+
import com.google.common.collect.ImmutableList;
17+
import com.google.common.collect.ImmutableMap;
18+
import io.airlift.units.Duration;
19+
import io.trino.plugin.hive.metastore.MetastoreUtil;
20+
import io.trino.plugin.hive.metastore.Table;
21+
import io.trino.testing.QueryRunner;
22+
import org.apache.hadoop.fs.Path;
23+
import org.testng.annotations.Test;
24+
25+
import java.util.List;
26+
import java.util.NoSuchElementException;
27+
28+
import static io.trino.plugin.hive.HiveQueryRunner.TPCH_SCHEMA;
29+
import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES;
30+
import static java.lang.String.format;
31+
import static org.testng.Assert.assertFalse;
32+
import static org.testng.Assert.assertTrue;
33+
34+
// some tests may invalidate the whole cache affecting therefore other concurrent tests
35+
@Test(singleThreaded = true)
36+
public class TestCachingDirectoryListerRecursiveFilesOnly
37+
extends BaseCachingDirectoryListerTest<CachingDirectoryLister>
38+
{
39+
@Override
40+
protected CachingDirectoryLister createDirectoryLister()
41+
{
42+
return new CachingDirectoryLister(Duration.valueOf("5m"), 1_000_000L, List.of("tpch.*"));
43+
}
44+
45+
@Override
46+
protected QueryRunner createQueryRunner()
47+
throws Exception
48+
{
49+
return createQueryRunner(ImmutableMap.of(
50+
"hive.allow-register-partition-procedure", "true",
51+
"hive.recursive-directories", "true"));
52+
}
53+
54+
@Override
55+
protected boolean isCached(CachingDirectoryLister directoryLister, Path path)
56+
{
57+
return directoryLister.isCached(new DirectoryListingCacheKey(path, true));
58+
}
59+
60+
@Test
61+
public void testRecursiveDirectories()
62+
{
63+
// Create partitioned table to force files to be inserted in sub-partition paths
64+
assertUpdate("CREATE TABLE recursive_directories (clicks bigint, day date, country varchar) WITH (format = 'ORC', partitioned_by = ARRAY['day', 'country'])");
65+
assertUpdate("INSERT INTO recursive_directories VALUES (1000, DATE '2022-02-01', 'US'), (2000, DATE '2022-02-01', 'US'), (4000, DATE '2022-02-02', 'US'), (1500, DATE '2022-02-01', 'AT'), (2500, DATE '2022-02-02', 'AT')", 5);
66+
67+
// Replace the partitioned table a new unpartitioned table with the same root location, leaving the data in place
68+
Table partitionedTable = getTable(TPCH_SCHEMA, "recursive_directories")
69+
.orElseThrow(() -> new NoSuchElementException(format("Failed to read table %s.%s", TPCH_SCHEMA, "recursive_directories")));
70+
// Must not delete the data files when dropping the partitioned table
71+
dropTable(TPCH_SCHEMA, "recursive_directories", false);
72+
// Must create the table directly to bypass check that the target directory already exists
73+
Table testTable = Table.builder(partitionedTable)
74+
.setPartitionColumns(ImmutableList.of())
75+
.build();
76+
createTable(testTable, testTable.getOwner().map(MetastoreUtil::buildInitialPrivilegeSet).orElse(NO_PRIVILEGES));
77+
78+
// Execute a query on the new table to pull the listing into the cache
79+
assertQuery("SELECT sum(clicks) FROM recursive_directories", "VALUES (11000)");
80+
81+
Path tableLocation = getTableLocation(TPCH_SCHEMA, "recursive_directories");
82+
assertTrue(isCached(tableLocation));
83+
84+
// Insert should invalidate cache, even at the root directory path
85+
assertUpdate("INSERT INTO recursive_directories VALUES (1000)", 1);
86+
assertFalse(isCached(tableLocation));
87+
88+
// Results should include the new insert which is at the table location root for the unpartitioned table
89+
assertQuery("SELECT sum(clicks) FROM recursive_directories", "VALUES (12000)");
90+
91+
assertUpdate("DROP TABLE recursive_directories");
92+
93+
assertFalse(isCached(tableLocation));
94+
}
95+
}

0 commit comments

Comments
 (0)