Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/142330.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
area: ES|QL
issues:
- 139883
pr: 142330
summary: Log compute engine server failures as ERROR
type: bug
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,12 @@ SubscribableListener<Void> run(TimeValue maxTime, int maxIterations, LongSupplie
RestStatus status = ExceptionsHelper.status(e);
// HTTP 4xx status codes (400-499) indicate client/user errors: malformed requests, authentication
// failures, cancellations, etc. These are expected user actions, not system failures, so log at
// DEBUG level to reduce noise. HTTP 5xx status codes (500+) indicate server errors: internal
// failures, timeouts, etc. These are system failures that need attention, so log at WARN level.
// DEBUG level to reduce noise. HTTP 5xx status codes (500+) indicate internal server failures and
// potential bugs, so log at ERROR.
if (status.getStatus() >= 400 && status.getStatus() < 500) {
LOGGER.debug(Strings.format("User error running driver [%s]", shortDescription), e);
} else {
LOGGER.warn(Strings.format("Error running driver [%s]", shortDescription), e);
LOGGER.error(Strings.format("Error running driver [%s]", shortDescription), e);
}
throw e;
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@

package org.elasticsearch.compute.operator;

import org.apache.logging.log4j.Level;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRunnable;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.MockBigArrays;
Expand All @@ -31,11 +34,16 @@
import org.elasticsearch.compute.test.TestDriverFactory;
import org.elasticsearch.compute.test.TestResultPageSinkOperator;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.tasks.TaskCancelledException;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.MockLog;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.threadpool.FixedExecutorBuilder;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -247,6 +255,59 @@ public void testUnchangedStatus() {
}
}

public void testServerErrorsAreLoggedAsError() {
assertFailureLogged(
new IllegalStateException("simulated compute bug"),
IllegalStateException.class,
Level.ERROR,
"*Error running driver [test-task]*"
);
assertFailureLogged(
new ArrayIndexOutOfBoundsException("simulated broken invariant"),
ArrayIndexOutOfBoundsException.class,
Level.ERROR,
"*Error running driver [test-task]*"
);
assertFailureLogged(
new RuntimeException(new IOException("io failure")),
RuntimeException.class,
Level.ERROR,
"*Error running driver [test-task]*"
);
}

@TestLogging(reason = "assert DEBUG user-error logging in Driver", value = "org.elasticsearch.compute.operator.Driver:DEBUG")
public void testClientErrorsAreLoggedAsDebug() {
assertFailureLogged(
new ElasticsearchStatusException("bad request", RestStatus.BAD_REQUEST),
ElasticsearchStatusException.class,
Level.DEBUG,
"*User error running driver [test-task]*"
);
assertFailureLogged(
new IllegalArgumentException("bad argument"),
IllegalArgumentException.class,
Level.DEBUG,
"*User error running driver [test-task]*"
);
assertFailureLogged(
new CircuitBreakingException("too many bytes", CircuitBreaker.Durability.PERMANENT),
CircuitBreakingException.class,
Level.DEBUG,
"*User error running driver [test-task]*"
);
}

@TestLogging(reason = "assert DEBUG cancellation logging in Driver", value = "org.elasticsearch.compute.operator.Driver:DEBUG")
public void testTaskCancellationIsLoggedAsDebug() {
assertFailureLogged(
new TaskCancelledException("cancelled"),
TaskCancelledException.class,
Level.DEBUG,
"*Cancelling running driver [test-task]*"
);
}

private static Driver createDriver(
long startEpoch,
long startNanos,
Expand All @@ -272,6 +333,51 @@ private static Driver createDriver(
);
}

private static Driver createFailingDriver(DriverContext driverContext, RuntimeException failure) {
return TestDriverFactory.create(
driverContext,
new CannedSourceOperator(List.of(new Page(driverContext.blockFactory().newConstantIntBlockWith(1, 1))).iterator()),
List.of(new AbstractPageMappingOperator() {
@Override
protected Page process(Page page) {
throw failure;
}

@Override
public String toString() {
return "failing_operator";
}
}),
new TestResultPageSinkOperator(page -> fail("sink should not receive output"))
);
}

private void assertFailureLogged(
RuntimeException failure,
Class<? extends RuntimeException> expectedExceptionClass,
Level expectedLogLevel,
String expectedMessagePattern
) {
Driver driver = createFailingDriver(driverContext(), failure);
try {
MockLog.assertThatLogger(
() -> expectThrows(
expectedExceptionClass,
() -> driver.run(TimeValue.timeValueDays(1), Integer.MAX_VALUE, System::nanoTime)
),
Driver.class,
new MockLog.SeenEventExpectation(
"expected log event for " + failure.getClass().getSimpleName(),
Driver.class.getCanonicalName(),
expectedLogLevel,
expectedMessagePattern
)
);
} finally {
driver.close();
}
}

static class NowSupplier implements LongSupplier {
private final long startNanos;
private final long waitTime;
Expand Down