Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,14 @@

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.FileVisitOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand All @@ -65,6 +70,52 @@ public class KafkaConnectUtils {

private static final Logger LOG = LogManager.getLogger(KafkaConnectUtils.class);
private static final String HOODIE_CONF_PREFIX = "hoodie.";
public static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR";
public static final String HADOOP_HOME = "HADOOP_HOME";
private static final List<Path> DEFAULT_HADOOP_CONF_FILES;

static {
DEFAULT_HADOOP_CONF_FILES = new ArrayList<>();
try {
String hadoopConfigPath = System.getenv(HADOOP_CONF_DIR);
String hadoopHomePath = System.getenv(HADOOP_HOME);
DEFAULT_HADOOP_CONF_FILES.addAll(getHadoopConfigFiles(hadoopConfigPath, hadoopHomePath));
if (!DEFAULT_HADOOP_CONF_FILES.isEmpty()) {
LOG.info(String.format("Found Hadoop default config files %s", DEFAULT_HADOOP_CONF_FILES));
}
Comment thread
codope marked this conversation as resolved.
} catch (IOException e) {
LOG.error("An error occurred while getting the default Hadoop configuration. "
+ "Please use hadoop.conf.dir or hadoop.home to configure Hadoop environment variables", e);
}
}

/**
* Get hadoop config files by HADOOP_CONF_DIR or HADOOP_HOME
*/
public static List<Path> getHadoopConfigFiles(String hadoopConfigPath, String hadoopHomePath)
throws IOException {
List<Path> hadoopConfigFiles = new ArrayList<>();
if (!StringUtils.isNullOrEmpty(hadoopConfigPath)) {
hadoopConfigFiles.addAll(walkTreeForXml(Paths.get(hadoopConfigPath)));
}
if (hadoopConfigFiles.isEmpty() && !StringUtils.isNullOrEmpty(hadoopHomePath)) {
hadoopConfigFiles.addAll(walkTreeForXml(Paths.get(hadoopHomePath, "etc", "hadoop")));
}
return hadoopConfigFiles;
}

/**
* Files walk to find xml
*/
private static List<Path> walkTreeForXml(Path basePath) throws IOException {
if (Files.notExists(basePath)) {
return new ArrayList<>();
}
return Files.walk(basePath, FileVisitOption.FOLLOW_LINKS)
.filter(path -> path.toFile().isFile())
.filter(path -> path.toString().endsWith(".xml"))
.collect(Collectors.toList());
}

public static int getLatestNumPartitions(String bootstrapServers, String topicName) {
Properties props = new Properties();
Expand All @@ -89,6 +140,23 @@ public static int getLatestNumPartitions(String bootstrapServers, String topicNa
*/
public static Configuration getDefaultHadoopConf(KafkaConnectConfigs connectConfigs) {
Configuration hadoopConf = new Configuration();

// add hadoop config files
if (!StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfDir())
Comment thread
codope marked this conversation as resolved.
|| !StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfHome())) {
try {
List<Path> configFiles = getHadoopConfigFiles(connectConfigs.getHadoopConfDir(),
connectConfigs.getHadoopConfHome());
configFiles.forEach(f ->
hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
} catch (Exception e) {
throw new HoodieException("Failed to read hadoop configuration!", e);
}
} else {
DEFAULT_HADOOP_CONF_FILES.forEach(f ->
hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
}

connectConfigs.getProps().keySet().stream().filter(prop -> {
// In order to prevent printing unnecessary warn logs, here filter out the hoodie
// configuration items before passing to hadoop/hive configs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ public class KafkaConnectConfigs extends HoodieConfig {
.defaultValue(true)
.withDocumentation("Commit even when some records failed to be written");

// Reference https://docs.confluent.io/kafka-connect-hdfs/current/configuration_options.html#hdfs
public static final ConfigProperty<String> HADOOP_CONF_DIR = ConfigProperty
.key("hadoop.conf.dir")
.noDefaultValue()
.withDocumentation("The Hadoop configuration directory.");

public static final ConfigProperty<String> HADOOP_HOME = ConfigProperty
.key("hadoop.home")
.noDefaultValue()
.withDocumentation("The Hadoop home directory.");

protected KafkaConnectConfigs() {
super();
}
Expand Down Expand Up @@ -145,6 +156,14 @@ public Boolean allowCommitOnErrors() {
return getBoolean(ALLOW_COMMIT_ON_ERRORS);
}

public String getHadoopConfDir() {
return getString(HADOOP_CONF_DIR);
}

public String getHadoopConfHome() {
return getString(HADOOP_HOME);
}

public static class Builder {

protected final KafkaConnectConfigs connectConfigs = new KafkaConnectConfigs();
Expand Down Expand Up @@ -185,6 +204,16 @@ public Builder withProperties(Properties properties) {
return this;
}

public Builder withHadoopConfDir(String hadoopConfDir) {
connectConfigs.setValue(HADOOP_CONF_DIR, String.valueOf(hadoopConfDir));
return this;
}

public Builder withHadoopHome(String hadoopHome) {
connectConfigs.setValue(HADOOP_HOME, String.valueOf(hadoopHome));
return this;
}

protected void setDefaults() {
// Check for mandatory properties
connectConfigs.setDefaults(KafkaConnectConfigs.class.getName());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hudi.connect;

import org.apache.hudi.connect.utils.KafkaConnectUtils;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.apache.hudi.connect.writers.KafkaConnectConfigs;
import org.junit.jupiter.api.Test;

import java.nio.file.Path;
import java.util.List;

public class TestHdfsConfiguration {

private boolean checkFiles(List<Path> paths) {
paths.removeIf(p -> {
String fileName = p.toFile().getName();
return fileName.equals("core-site.xml") || fileName.equals("hdfs-site.xml");
});
return paths.isEmpty();
}

@Test
public void testHadoopConfigEnvs() throws Exception {
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
"src/test/resources/hadoop_conf", "");
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}

@Test
public void testHadoopHomeEnvs() throws Exception {
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
"","src/test/resources/hadoop_home");
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}

@Test
public void testKafkaConfig() throws Exception {
KafkaConnectConfigs connectConfigs = KafkaConnectConfigs.newBuilder()
.withHadoopHome("src/test/resources/hadoop_home")
.build();
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
connectConfigs.getHadoopConfDir(),
connectConfigs.getHadoopConfHome()
);
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}
}
33 changes: 33 additions & 0 deletions hudi-kafka-connect/src/test/resources/hadoop_conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://test-hudi-path:9000</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>

</configuration>
30 changes: 30 additions & 0 deletions hudi-kafka-connect/src/test/resources/hadoop_conf/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>

<property>
<name>dfs.namenode.http-address</name>
<value>http://test-hudi-path:50070</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
</description>
</property>

</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://test-hudi-path:9000</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>

</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>

<property>
<name>dfs.namenode.http-address</name>
<value>http://test-hudi-path:50070</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
</description>
</property>

</configuration>