Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.hive.metastore.thrift;

import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import io.airlift.log.Logger;
import io.airlift.units.Duration;
Expand Down Expand Up @@ -74,6 +75,24 @@ public static RetryDriver retry()
return new RetryDriver();
}

/**
* Check if the exception indicates an access control failure.
* These exceptions should not be retried as they represent permanent
* authorization failures.
* For example: MetaException wrapping AccessControlException from HDFS
* permission denied errors.
*/
private static boolean isAccessControlException(Exception exception)
{
// Check the exception message and cause chain for AccessControlException
// e.g. io.trino.hive.thrift.metastore.MetaException:
// org.apache.hadoop.security.AccessControlException: Permission denied: ...
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could be simplified to:

Throwables.getCausalChain(exception)
                .stream()
                .map(Throwable::toString)
                .anyMatch(message -> message != null && message.contains("AccessControlException"));

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Simplified :)

return Throwables.getCausalChain(exception)
.stream()
.map(Throwable::toString)
.anyMatch(message -> message != null && message.contains("AccessControlException"));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an idea : maybe we could use the class to check if it is a AccessControlException, also since the class is marked deprecated, so could use the SecurityException instead?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean do this instead of contains?

  • err.getClass().getName().equals("org.apache.hadoop.fs.permission.AccessControlException")) {

Seems like it is superseded by "org.apache.hadoop.security.AccessControlException"
Nonetheless, depending on Hive version, any of the class name could occur, so we could check for both class

}

public final RetryDriver maxAttempts(int maxAttempts)
{
return new RetryDriver(maxAttempts, minSleepTime, maxSleepTime, scaleFactor, maxRetryTime, stopOnExceptions);
Expand Down Expand Up @@ -128,6 +147,11 @@ public <V> V run(String callableName, Callable<V> callable)
throw e;
}
}
// Do not retry on access control exceptions - these are permanent failures
if (isAccessControlException(e)) {
addSuppressed(e, suppressedExceptions);
throw e;
}
if (attempt >= maxAttempts || Duration.nanosSince(startTime).compareTo(maxRetryTime) >= 0) {
addSuppressed(e, suppressedExceptions);
throw e;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.metastore.thrift;

import io.airlift.units.Duration;
import io.trino.hive.thrift.metastore.MetaException;
import org.junit.jupiter.api.Test;

import java.util.concurrent.atomic.AtomicInteger;

import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

class TestRetryDriver
{
@Test
void testSuccessfulCall()
throws Exception
{
AtomicInteger attempts = new AtomicInteger(0);
String result = RetryDriver.retry().maxAttempts(3).run("test", () -> {
attempts.incrementAndGet();
return "success";
});

assertThat(result).isEqualTo("success");
assertThat(attempts.get()).isEqualTo(1);
}

@Test
void testRetryOnFailure()
throws Exception
{
AtomicInteger attempts = new AtomicInteger(0);
String result = RetryDriver.retry()
.maxAttempts(3)
.exponentialBackoff(new Duration(1, MILLISECONDS), new Duration(10, MILLISECONDS),
new Duration(1, SECONDS), 2.0)
.run("test", () -> {
if (attempts.incrementAndGet() < 3) {
throw new RuntimeException("Temporary failure");
}
return "success";
});

assertThat(result).isEqualTo("success");
assertThat(attempts.get()).isEqualTo(3);
}

@Test
void testStopOnSpecificException()
{
AtomicInteger attempts = new AtomicInteger(0);

assertThatThrownBy(
() -> RetryDriver.retry().maxAttempts(5).stopOn(IllegalArgumentException.class).run("test", () -> {
attempts.incrementAndGet();
throw new IllegalArgumentException("Stop immediately");
})).isInstanceOf(IllegalArgumentException.class).hasMessage("Stop immediately");

// Should stop on first attempt without retrying
assertThat(attempts.get()).isEqualTo(1);
}

@Test
void testDoNotRetryOnAccessControlException()
{
AtomicInteger attempts = new AtomicInteger(0);

// Simulate MetaException wrapping AccessControlException (as seen in HDFS
// permission denied errors)
assertThatThrownBy(() -> RetryDriver.retry()
.maxAttempts(5)
.exponentialBackoff(new Duration(1, MILLISECONDS), new Duration(10, MILLISECONDS),
new Duration(1, SECONDS), 2.0)
.run("test", () -> {
attempts.incrementAndGet();
throw new MetaException(
"org.apache.hadoop.security.AccessControlException: Permission denied: user=testuser, access=EXECUTE, inode=\"/user/hive\"");
})).isInstanceOf(MetaException.class).hasMessageContaining("AccessControlException");

// Should stop on first attempt without retrying
assertThat(attempts.get()).isEqualTo(1);
}

@Test
void testDoNotRetryOnNestedAccessControlException()
{
AtomicInteger attempts = new AtomicInteger(0);

// Simulate exception with AccessControlException in the cause chain
Exception accessControlCause = new RuntimeException(
"org.apache.hadoop.security.AccessControlException: Permission denied");
Exception wrappedException = new RuntimeException("Wrapper exception", accessControlCause);

assertThatThrownBy(() -> RetryDriver.retry()
.maxAttempts(5)
.exponentialBackoff(new Duration(1, MILLISECONDS), new Duration(10, MILLISECONDS),
new Duration(1, SECONDS), 2.0)
.run("test", () -> {
attempts.incrementAndGet();
throw wrappedException;
})).isInstanceOf(RuntimeException.class).hasMessageContaining("Wrapper exception");

// Should stop on first attempt without retrying
assertThat(attempts.get()).isEqualTo(1);
}

@Test
void testRetryOnOtherMetaException()
throws Exception
{
AtomicInteger attempts = new AtomicInteger(0);

// MetaException without AccessControlException should be retried
String result = RetryDriver.retry()
.maxAttempts(3)
.exponentialBackoff(new Duration(1, MILLISECONDS), new Duration(10, MILLISECONDS),
new Duration(1, SECONDS), 2.0)
.run("test", () -> {
if (attempts.incrementAndGet() < 3) {
throw new MetaException("Temporary metastore error");
}
return "success";
});

assertThat(result).isEqualTo("success");
assertThat(attempts.get()).isEqualTo(3);
}

@Test
void testMaxAttemptsExceeded()
{
AtomicInteger attempts = new AtomicInteger(0);

assertThatThrownBy(() -> RetryDriver.retry()
.maxAttempts(3)
.exponentialBackoff(new Duration(1, MILLISECONDS), new Duration(10, MILLISECONDS),
new Duration(1, SECONDS), 2.0)
.run("test", () -> {
attempts.incrementAndGet();
throw new RuntimeException("Always fails");
})).isInstanceOf(RuntimeException.class).hasMessage("Always fails");

assertThat(attempts.get()).isEqualTo(3);
}
}