Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
db77af8
HBASE-25373 Remove HTrace completely in code base and try to make use…
Apache9 Dec 9, 2020
da67fad
HBASE-25401 Add trace support for async call in rpc client (#2790)
Apache9 Dec 23, 2020
a7ba387
HBASE-25424 Find a way to config OpenTelemetry tracing without direct…
Apache9 Dec 24, 2020
92fd790
HBASE-23898 Add trace support for simple apis in async client (#2813)
Apache9 Dec 30, 2020
8704b90
HBASE-25454 Add trace support for connection registry (#2828)
Apache9 Jan 1, 2021
b2c76d1
HBASE-25481 Add host and port attribute when tracing rpc call at clie…
Apache9 Jan 8, 2021
b4239a4
HBASE-25455 Add trace support for HRegion read/write operation (#2861)
Apache9 Jan 18, 2021
acc8b0e
HBASE-25484 Add trace support for WAL sync (#2892)
Apache9 Jan 22, 2021
4a6e539
HBASE-25535 Set span kind to CLIENT in AbstractRpcClient (#2907)
Apache9 Jan 28, 2021
fcda8b6
HBASE-25591 Upgrade opentelemetry to 0.17.1 (#2971)
Apache9 Feb 26, 2021
17e4fd9
HBASE-25617 Revisit the span names (#2998)
Apache9 Mar 1, 2021
a854b22
HBASE-25616 Upgrade opentelemetry to 1.0.0 (#3034)
Apache9 Mar 10, 2021
1f79571
HBASE-25723 Temporarily remove the trace support for RegionScanner.ne…
Apache9 Apr 3, 2021
2a8da1f
HBASE-25732 Change the command line argument for tracing after upgrad…
Apache9 Apr 7, 2021
29480e2
HBASE-25733 Upgrade opentelemetry to 1.0.1 (#3122)
Apache9 Apr 7, 2021
d7b490a
HBASE-23762 Add documentation on how to enable and view tracing with …
Apache9 Apr 9, 2021
c12e0a7
HBASE-25778 The tracinig implementation for AsyncConnectionImpl.getHb…
Apache9 Apr 16, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bin/hbase
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,11 @@ add_jdk11_deps_to_classpath() {
done
}

enable_trace() {
agent_jar=$(find lib/trace -type f -name "opentelemetry-javaagent-*")
HBASE_OPTS="$HBASE_OPTS -javaagent:$agent_jar $HBASE_TRACE_OPTS"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'll just fail if two classes w/ same prefix... that is fine I think and shouldn't happen usually.

}

#Add the development env class path stuff
if $in_dev_env; then
add_maven_deps_to_classpath "cached_classpath.txt"
Expand Down Expand Up @@ -772,6 +777,11 @@ elif [ "${DEBUG}" = "true" ]; then
echo "JDK11 jars skipped from classpath."
fi

if [[ -n "${HBASE_TRACE_OPTS}" ]]; then
echo "Attach opentelemetry agent to enable trace"
enable_trace
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This trick looks good. Where will you document it? Maybe if there was an 'hbase trace' command and if you ran it, you'd get output telling you to populate HBASE_TRACE_OPTs giving examples... perhaps that would be enough to get folks going? Would also advertise the new facility exits.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In hbase-env.sh, uncomment the last line and modify the options. Will fill this in the ref guide.

fi

# Have JVM dump heap if we run out of memory. Files will be 'launch directory'
# and are named like the following: java_pid21612.hprof. Apparently it doesn't
# 'cost' to have this flag enabled. Its a 1.6 flag only. See:
Expand Down
5 changes: 5 additions & 0 deletions conf/hbase-env.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,8 @@ set HBASE_OPTS=%HBASE_OPTS% "-XX:+UseConcMarkSweepGC" "-Djava.net.preferIPv4Stac

@rem Tell HBase whether it should manage it's own instance of ZooKeeper or not.
@rem set HBASE_MANAGES_ZK=true

@rem Uncomment to enable trace, you can change the options to use other exporters such as jaeger or
@rem zipkin. See https://github.com/open-telemetry/opentelemetry-java-instrumentation on how to
@rem configure exporters and other components through system properties.
@rem set HBASE_TRACE_OPTS="-Dotel.resource.attributes=service.name=HBase -Dotel.traces.exporter=logging otel.metrics.exporter=none"
5 changes: 5 additions & 0 deletions conf/hbase-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,8 @@
# Override text processing tools for use by these launch scripts.
# export GREP="${GREP-grep}"
# export SED="${SED-sed}"

# Uncomment to enable trace, you can change the options to use other exporters such as jaeger or
# zipkin. See https://github.com/open-telemetry/opentelemetry-java-instrumentation on how to
# configure exporters and other components through system properties.
# export HBASE_TRACE_OPTS="-Dotel.resource.attributes=service.name=HBase -Dotel.traces.exporter=logging otel.metrics.exporter=none"
6 changes: 6 additions & 0 deletions hbase-assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -347,5 +347,11 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</dependency>
<!-- Include OpenTelemetry agent -->
<dependency>
<groupId>io.opentelemetry.javaagent</groupId>
<artifactId>opentelemetry-javaagent</artifactId>
<classifier>all</classifier>
</dependency>
</dependencies>
</project>
9 changes: 8 additions & 1 deletion hbase-assembly/src/main/assembly/hadoop-three-compat.xml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
<exclude>org.apache.yetus:audience-annotations</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>org.apache.logging.log4j:*</exclude>
<exclude>io.opentelemetry.javaagent:*</exclude>
</excludes>
</dependencySet>
</dependencySets>
Expand Down Expand Up @@ -221,7 +222,7 @@
<outputDirectory>lib/jdk11</outputDirectory>
<useTransitiveDependencies>true</useTransitiveDependencies>
<includes>
<include>com.sun.activation:javax.activation</include>
<include>com.sun.activation:javax.activation</include>
<!-- The following artifacts are transitive dependencies of com.sun.xml.ws:jaxws-ri:pom
They are needed to be included in lib/jdk11 to be added to classpath during
Java 11 runtime
Expand Down Expand Up @@ -253,6 +254,12 @@
<include>jakarta.jws:jakarta.jws-api</include>
</includes>
</dependencySet>
<dependencySet>
<outputDirectory>lib/trace</outputDirectory>
<includes>
<include>io.opentelemetry.javaagent:*</include>
</includes>
</dependencySet>
</dependencySets>

</assembly>
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@

import java.io.File;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -99,7 +97,6 @@ protected static void startMiniDFSCluster(int servers) throws IOException {

Configuration conf = UTIL.getConfiguration();

TraceUtil.initTracer(conf);
CLUSTER = new MiniDFSCluster.Builder(conf).numDataNodes(servers).build();
CLUSTER.waitClusterUp();
}
Expand Down
14 changes: 12 additions & 2 deletions hbase-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@
<artifactId>zookeeper</artifactId>
</dependency>
<dependency>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core4</artifactId>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-api</artifactId>
</dependency>
<dependency>
<groupId>org.jruby.jcodings</groupId>
Expand All @@ -144,6 +144,16 @@
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk-testing</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public interface AsyncConnection extends Closeable {
/**
* Retrieve an {@link AsyncTable} implementation for accessing a table.
* <p>
* The returned instance will use default configs. Use {@link #getTableBuilder(TableName)} if
* you want to customize some configs.
* The returned instance will use default configs. Use {@link #getTableBuilder(TableName)} if you
* want to customize some configs.
* <p>
* This method no longer checks table existence. An exception will be thrown if the table does not
* exist only when the first operation is attempted.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static org.apache.hadoop.hbase.client.NonceGenerator.CLIENT_NONCES_ENABLED_KEY;
import static org.apache.hadoop.hbase.util.FutureUtils.addListener;

import io.opentelemetry.api.trace.Span;
import java.io.IOException;
import java.net.SocketAddress;
import java.util.Optional;
Expand All @@ -38,6 +39,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.AuthUtil;
Expand All @@ -51,6 +53,7 @@
import org.apache.hadoop.hbase.ipc.RpcClientFactory;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.ConcurrentMapUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.security.UserGroupInformation;
Expand Down Expand Up @@ -121,7 +124,7 @@ class AsyncConnectionImpl implements AsyncConnection {
private volatile ConnectionOverAsyncConnection conn;

public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, String clusterId,
SocketAddress localAddress, User user) {
SocketAddress localAddress, User user) {
this.conf = conf;
this.user = user;

Expand All @@ -135,8 +138,8 @@ public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, Stri
} else {
this.metrics = Optional.empty();
}
this.rpcClient = RpcClientFactory.createClient(
conf, clusterId, localAddress, metrics.orElse(null));
this.rpcClient =
RpcClientFactory.createClient(conf, clusterId, localAddress, metrics.orElse(null));
this.rpcControllerFactory = RpcControllerFactory.instantiate(conf);
this.rpcTimeout =
(int) Math.min(Integer.MAX_VALUE, TimeUnit.NANOSECONDS.toMillis(connConf.getRpcTimeoutNs()));
Expand All @@ -159,14 +162,13 @@ public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, Stri
LOG.warn("{} is true, but {} is not set", STATUS_PUBLISHED, STATUS_LISTENER_CLASS);
} else {
try {
listener = new ClusterStatusListener(
new ClusterStatusListener.DeadServerHandler() {
@Override
public void newDead(ServerName sn) {
locator.clearCache(sn);
rpcClient.cancelConnections(sn);
}
}, conf, listenerClass);
listener = new ClusterStatusListener(new ClusterStatusListener.DeadServerHandler() {
@Override
public void newDead(ServerName sn) {
locator.clearCache(sn);
rpcClient.cancelConnections(sn);
}
}, conf, listenerClass);
} catch (IOException e) {
LOG.warn("Failed create of ClusterStatusListener, not a critical, ignoring...", e);
}
Expand Down Expand Up @@ -206,28 +208,30 @@ public boolean isClosed() {

@Override
public void close() {
if (!closed.compareAndSet(false, true)) {
return;
}
LOG.info("Connection has been closed by {}.", Thread.currentThread().getName());
if(LOG.isDebugEnabled()){
logCallStack(Thread.currentThread().getStackTrace());
}
IOUtils.closeQuietly(clusterStatusListener,
e -> LOG.warn("failed to close clusterStatusListener", e));
IOUtils.closeQuietly(rpcClient, e -> LOG.warn("failed to close rpcClient", e));
IOUtils.closeQuietly(registry, e -> LOG.warn("failed to close registry", e));
synchronized (this) {
if (choreService != null) {
choreService.shutdown();
choreService = null;
TraceUtil.trace(() -> {
if (!closed.compareAndSet(false, true)) {
return;
}
}
metrics.ifPresent(MetricsConnection::shutdown);
ConnectionOverAsyncConnection c = this.conn;
if (c != null) {
c.closePool();
}
LOG.info("Connection has been closed by {}.", Thread.currentThread().getName());
if (LOG.isDebugEnabled()) {
logCallStack(Thread.currentThread().getStackTrace());
}
IOUtils.closeQuietly(clusterStatusListener,
e -> LOG.warn("failed to close clusterStatusListener", e));
IOUtils.closeQuietly(rpcClient, e -> LOG.warn("failed to close rpcClient", e));
IOUtils.closeQuietly(registry, e -> LOG.warn("failed to close registry", e));
synchronized (this) {
if (choreService != null) {
choreService.shutdown();
choreService = null;
}
}
metrics.ifPresent(MetricsConnection::shutdown);
ConnectionOverAsyncConnection c = this.conn;
if (c != null) {
c.closePool();
}
}, "AsyncConnection.close");
}

private void logCallStack(StackTraceElement[] stackTraceElements) {
Expand Down Expand Up @@ -341,7 +345,7 @@ public AsyncTable<AdvancedScanResultConsumer> build() {

@Override
public AsyncTableBuilder<ScanResultConsumer> getTableBuilder(TableName tableName,
ExecutorService pool) {
ExecutorService pool) {
return new AsyncTableBuilderBase<ScanResultConsumer>(tableName, connConf) {

@Override
Expand Down Expand Up @@ -382,7 +386,7 @@ public AsyncBufferedMutatorBuilder getBufferedMutatorBuilder(TableName tableName

@Override
public AsyncBufferedMutatorBuilder getBufferedMutatorBuilder(TableName tableName,
ExecutorService pool) {
ExecutorService pool) {
return new AsyncBufferedMutatorBuilderImpl(connConf, getTableBuilder(tableName, pool),
RETRY_TIMER);
}
Expand All @@ -404,30 +408,39 @@ public Connection toConnection() {
return c;
}

private Hbck getHbckInternal(ServerName masterServer) {
Span.current().setAttribute(TraceUtil.SERVER_NAME_KEY, masterServer.getServerName());
// we will not create a new connection when creating a new protobuf stub, and for hbck there
// will be no performance consideration, so for simplification we will create a new stub every
// time instead of caching the stub here.
return new HBaseHbck(MasterProtos.HbckService.newBlockingStub(
rpcClient.createBlockingRpcChannel(masterServer, user, rpcTimeout)), rpcControllerFactory);
}

@Override
public CompletableFuture<Hbck> getHbck() {
CompletableFuture<Hbck> future = new CompletableFuture<>();
addListener(registry.getActiveMaster(), (sn, error) -> {
if (error != null) {
future.completeExceptionally(error);
} else {
try {
future.complete(getHbck(sn));
} catch (IOException e) {
future.completeExceptionally(e);
return TraceUtil.tracedFuture(() -> {
CompletableFuture<Hbck> future = new CompletableFuture<>();
addListener(registry.getActiveMaster(), (sn, error) -> {
if (error != null) {
future.completeExceptionally(error);
} else {
future.complete(getHbckInternal(sn));
}
}
});
return future;
});
return future;
}, "AsyncConnection.getHbck");
}

@Override
public Hbck getHbck(ServerName masterServer) throws IOException {
// we will not create a new connection when creating a new protobuf stub, and for hbck there
// will be no performance consideration, so for simplification we will create a new stub every
// time instead of caching the stub here.
return new HBaseHbck(MasterProtos.HbckService.newBlockingStub(
rpcClient.createBlockingRpcChannel(masterServer, user, rpcTimeout)), rpcControllerFactory);
public Hbck getHbck(ServerName masterServer) {
return TraceUtil.trace(new Supplier<Hbck>() {

@Override
public Hbck get() {
return getHbckInternal(masterServer);
}
}, "AsyncConnection.getHbck");
}

Optional<MetricsConnection> getConnectionMetrics() {
Expand Down
Loading