Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 1 addition & 15 deletions plugin/trino-hive/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -175,21 +175,7 @@
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcsio</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>util</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>util-hadoop</artifactId>
<classifier>shaded</classifier>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
*/
package io.trino.plugin.hive.gcs;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import io.trino.hdfs.DynamicConfigurationProvider;
import io.trino.hdfs.HdfsContext;
import org.apache.hadoop.conf.Configuration;

import java.net.URI;

import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.SCHEME;
import static io.trino.hdfs.DynamicConfigurationProvider.setCacheKey;
import static io.trino.plugin.hive.gcs.GcsAccessTokenProvider.GCS_ACCESS_TOKEN_CONF;

Expand All @@ -31,7 +31,7 @@ public class GcsConfigurationProvider
@Override
public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri)
{
if (!uri.getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This import may have been easier to read than new static import.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i agree, "SCHEME" is not a good name to import statically

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elonazoulay let's leave this file as it was before this PR.

if (!uri.getScheme().equals(SCHEME)) {
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,16 @@
package io.trino.plugin.hive.gcs;

import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem;
import com.google.cloud.hadoop.util.AccessTokenProvider;
import io.trino.hdfs.ConfigurationInitializer;
import org.apache.hadoop.conf.Configuration;

import javax.inject.Inject;

import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.AUTHENTICATION_PREFIX;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.AUTH_SERVICE_ACCOUNT_ENABLE;
import static com.google.cloud.hadoop.util.AccessTokenProviderClassFromConfigFactory.ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX;
import static com.google.cloud.hadoop.util.EntriesCredentialConfiguration.JSON_KEYFILE_SUFFIX;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_CONFIG_PREFIX;
import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX;
import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX;
import static com.google.cloud.hadoop.fs.gcs.HadoopCredentialConfiguration.SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX;

public class GoogleGcsConfigurationInitializer
implements ConfigurationInitializer
Expand All @@ -44,13 +45,13 @@ public void initializeConfiguration(Configuration config)

if (useGcsAccessToken) {
// use oauth token to authenticate with Google Cloud Storage
config.set(AUTH_SERVICE_ACCOUNT_ENABLE.getKey(), "false");
config.set(AUTHENTICATION_PREFIX + ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX, GcsAccessTokenProvider.class.getName());
config.setBoolean(GCS_CONFIG_PREFIX + ENABLE_SERVICE_ACCOUNTS_SUFFIX.getKey(), false);
config.setClass(GCS_CONFIG_PREFIX + ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX.getKey(), GcsAccessTokenProvider.class, AccessTokenProvider.class);
}
else if (jsonKeyFilePath != null) {
// use service account key file
config.set(AUTH_SERVICE_ACCOUNT_ENABLE.getKey(), "true");
config.set(AUTHENTICATION_PREFIX + JSON_KEYFILE_SUFFIX, jsonKeyFilePath);
config.setBoolean(GCS_CONFIG_PREFIX + ENABLE_SERVICE_ACCOUNTS_SUFFIX.getKey(), true);
config.set(GCS_CONFIG_PREFIX + SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX.getKey(), jsonKeyFilePath);
}
}
}
73 changes: 42 additions & 31 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<dep.oracle.version>19.3.0.0</dep.oracle.version>
<dep.drift.version>1.14</dep.drift.version>
<dep.tempto.version>189</dep.tempto.version>
<dep.gcs.version>2.0.0</dep.gcs.version>
<dep.gcs.version>2.2.8</dep.gcs.version>
<dep.errorprone.version>2.15.0</dep.errorprone.version>
<dep.testcontainers.version>1.17.3</dep.testcontainers.version>
<dep.duct-tape.version>1.0.8</dep.duct-tape.version>
Expand Down Expand Up @@ -1114,42 +1114,15 @@
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
<version>hadoop2-${dep.gcs.version}</version>
<classifier>shaded</classifier>
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you opt for shading?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is because we can have issues with other dependencies without shading.

<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcsio</artifactId>
<version>${dep.gcs.version}</version>
<exclusions>
<exclusion>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>util</artifactId>
<version>${dep.gcs.version}</version>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>util-hadoop</artifactId>
<version>hadoop2-${dep.gcs.version}</version>
</dependency>

<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_annotations</artifactId>
Expand Down Expand Up @@ -1951,6 +1924,44 @@
<useNativeGit>true</useNativeGit>
</configuration>
</plugin>
<plugin>
<groupId>org.basepom.maven</groupId>
<artifactId>duplicate-finder-maven-plugin</artifactId>
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What it takes to fix issues instead of ignoring them? Can you please list issues that got without this?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here are the issues:

[WARNING] Found duplicate (but equal) resources in [com.google.cloud.bigdataoss:gcs-connector:hadoop2-2.2.8:jar:shaded, org.apache.httpcomponents:httpclient:4.5.13]:
[WARNING]   mozilla/public-suffix-list.txt
[WARNING] Found duplicate classes/resources in compile classpath.
[WARNING] Found duplicate (but equal) resources in [com.google.cloud.bigdataoss:gcs-connector:hadoop2-2.2.8:jar:shaded, org.apache.httpcomponents:httpclient:4.5.13]:
[WARNING]   mozilla/public-suffix-list.txt
[WARNING] Found duplicate classes/resources in runtime classpath.
[WARNING] Found duplicate (but equal) resources in [com.google.cloud.bigdataoss:gcs-connector:hadoop2-2.2.8:jar:shaded, org.apache.httpcomponents:httpclient:4.5.13]:
[WARNING]   mozilla/public-suffix-list.txt
[WARNING] Found duplicate classes/resources in test classpath.

Copy link
Copy Markdown
Member Author

@elonazoulay elonazoulay Sep 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Second warning is from the trino-faulttolerant-tests module:

[WARNING] Found duplicate and different resources in [com.google.api:gax:2.17.0, com.google.cloud.bigdataoss:gcs-connector:hadoop2-2.2.8:jar:shaded]:
[WARNING]   dependencies.properties
[WARNING] Found duplicate classes/resources in test classpath.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the update.

<configuration>
<exceptions combine.children="append">
<exception>
<conflictingDependencies>
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
</conflictingDependencies>
<resources>
<resource>mozilla/public-suffix-list.txt</resource>
</resources>
</exception>
<exception>
<conflictingDependencies>
<dependency>
<groupId>com.google.api</groupId>
<artifactId>gax</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
</dependency>
</conflictingDependencies>
<resources>
<resource>dependencies.properties</resource>
Comment on lines 1949 to 1959
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both are Google-provided. why do they conflict?

cc @medb

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lmk if there's a better way to handle it.
This is the error without including the conflicting dependencies:

[INFO] --- duplicate-finder-maven-plugin:1.5.0:check (default) @ trino-hive ---
[INFO] Checking compile classpath
[INFO] Checking runtime classpath
[INFO] Checking test classpath
[WARNING] Found duplicate and different resources in [com.google.api:gax:2.17.0, com.google.cloud.bigdataoss:gcs-connector:hadoop2-2.2.6:jar:shaded]:
[WARNING]   dependencies.properties
[WARNING] Found duplicate classes/resources in test classpath.
[INFO] ------------------------------------------------------------------------
[INFO] BUILD FAILURE
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  41.005 s
[INFO] Finished at: 2022-05-26T06:03:10-07:00
[INFO] ------------------------------------------------------------------------
[ERROR] Failed to execute goal org.basepom.maven:duplicate-finder-maven-plugin:1.5.0:check (default) on project trino-hive: Found duplicate classes/resources! -> [Help 1]
[ERROR] 
[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
[ERROR] 
[ERROR] For more information about the errors and possible solutions, please read the following articles:
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoExecutionException

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably because GCS connector also depends on GAX and during shading packages GAX's dependencies.properties file, not sure what its purpose, but if not necessary we can exclude it from the shaded jar.

</resources>
</exception>
</exceptions>
</configuration>
</plugin>
</plugins>
</pluginManagement>

Expand Down