-
Notifications
You must be signed in to change notification settings - Fork 3k
Kafka Connect: Commit coordination #10351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.connect; | ||
|
|
||
| import java.io.IOException; | ||
| import java.net.URL; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.nio.file.Paths; | ||
| import java.util.List; | ||
| import org.apache.iceberg.CatalogUtil; | ||
| import org.apache.iceberg.catalog.Catalog; | ||
| import org.apache.iceberg.common.DynClasses; | ||
| import org.apache.iceberg.common.DynConstructors; | ||
| import org.apache.iceberg.common.DynMethods; | ||
| import org.apache.iceberg.common.DynMethods.BoundMethod; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| class CatalogUtils { | ||
|
|
||
| private static final Logger LOG = LoggerFactory.getLogger(CatalogUtils.class.getName()); | ||
| private static final List<String> HADOOP_CONF_FILES = | ||
| ImmutableList.of("core-site.xml", "hdfs-site.xml", "hive-site.xml"); | ||
|
|
||
| static Catalog loadCatalog(IcebergSinkConfig config) { | ||
| return CatalogUtil.buildIcebergCatalog( | ||
| config.catalogName(), config.catalogProps(), loadHadoopConfig(config)); | ||
| } | ||
|
|
||
| // use reflection here to avoid requiring Hadoop as a dependency | ||
| private static Object loadHadoopConfig(IcebergSinkConfig config) { | ||
| Class<?> configClass = | ||
| DynClasses.builder() | ||
| .impl("org.apache.hadoop.hdfs.HdfsConfiguration") | ||
| .impl("org.apache.hadoop.conf.Configuration") | ||
| .orNull() | ||
| .build(); | ||
|
|
||
| if (configClass == null) { | ||
| LOG.info("Hadoop not found on classpath, not creating Hadoop config"); | ||
| return null; | ||
| } | ||
|
|
||
| try { | ||
| Object result = DynConstructors.builder().hiddenImpl(configClass).build().newInstance(); | ||
| BoundMethod addResourceMethod = | ||
| DynMethods.builder("addResource").impl(configClass, URL.class).build(result); | ||
| BoundMethod setMethod = | ||
| DynMethods.builder("set").impl(configClass, String.class, String.class).build(result); | ||
|
|
||
| // load any config files in the specified config directory | ||
| String hadoopConfDir = config.hadoopConfDir(); | ||
| if (hadoopConfDir != null) { | ||
| HADOOP_CONF_FILES.forEach( | ||
| confFile -> { | ||
| Path path = Paths.get(hadoopConfDir, confFile); | ||
| if (Files.exists(path)) { | ||
| try { | ||
| addResourceMethod.invoke(path.toUri().toURL()); | ||
| } catch (IOException e) { | ||
| LOG.warn("Error adding Hadoop resource {}, resource was not added", path, e); | ||
| } | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| // set any Hadoop properties specified in the sink config | ||
| config.hadoopProps().forEach(setMethod::invoke); | ||
|
|
||
| LOG.info("Hadoop config initialized: {}", configClass.getName()); | ||
| return result; | ||
| } catch (Exception e) { | ||
| LOG.warn( | ||
| "Hadoop found on classpath but could not create config, proceeding without config", e); | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| private CatalogUtils() {} | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.connect; | ||
|
|
||
| import java.util.Collection; | ||
| import org.apache.iceberg.catalog.Catalog; | ||
| import org.apache.kafka.connect.sink.SinkRecord; | ||
| import org.apache.kafka.connect.sink.SinkTaskContext; | ||
|
|
||
| public interface Committer { | ||
| void start(Catalog catalog, IcebergSinkConfig config, SinkTaskContext context); | ||
|
|
||
| void stop(); | ||
|
|
||
| void save(Collection<SinkRecord> sinkRecords); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.connect; | ||
|
|
||
| import org.apache.iceberg.connect.channel.CommitterImpl; | ||
|
|
||
| class CommitterFactory { | ||
| static Committer createCommitter(IcebergSinkConfig config) { | ||
| return new CommitterImpl(); | ||
| } | ||
|
|
||
| private CommitterFactory() {} | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.connect; | ||
|
|
||
| import java.util.Collection; | ||
| import java.util.Map; | ||
| import org.apache.iceberg.catalog.Catalog; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.kafka.clients.consumer.OffsetAndMetadata; | ||
| import org.apache.kafka.common.TopicPartition; | ||
| import org.apache.kafka.connect.sink.SinkRecord; | ||
| import org.apache.kafka.connect.sink.SinkTask; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| public class IcebergSinkTask extends SinkTask { | ||
|
|
||
| private static final Logger LOG = LoggerFactory.getLogger(IcebergSinkTask.class); | ||
|
|
||
| private IcebergSinkConfig config; | ||
| private Catalog catalog; | ||
Fokko marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private Committer committer; | ||
|
|
||
| @Override | ||
| public String version() { | ||
| return IcebergSinkConfig.version(); | ||
Fokko marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| @Override | ||
| public void start(Map<String, String> props) { | ||
| this.config = new IcebergSinkConfig(props); | ||
| } | ||
|
|
||
| @Override | ||
| public void open(Collection<TopicPartition> partitions) { | ||
| Preconditions.checkArgument(catalog == null, "Catalog already open"); | ||
| Preconditions.checkArgument(committer == null, "Committer already open"); | ||
|
|
||
| catalog = CatalogUtils.loadCatalog(config); | ||
Fokko marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm also worried we might have a bug here. The Committer implementation uses this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's not my understanding,
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did read the javadoc and interpreted it differently but I can totally see how you reached your interpretation as well. The reason I'm inclined towards my interpretation is because I know I could still be wrong so feel free to test things out or ask in the kafka community.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, it does seem like you're right. I changed this to get the consumer assignment instead. |
||
| committer = CommitterFactory.createCommitter(config); | ||
| committer.start(catalog, config, context); | ||
| } | ||
|
|
||
| @Override | ||
| public void close(Collection<TopicPartition> partitions) { | ||
| close(); | ||
| } | ||
|
|
||
| private void close() { | ||
| if (committer != null) { | ||
| committer.stop(); | ||
| committer = null; | ||
| } | ||
|
|
||
| if (catalog != null) { | ||
| if (catalog instanceof AutoCloseable) { | ||
| try { | ||
| ((AutoCloseable) catalog).close(); | ||
| } catch (Exception e) { | ||
| LOG.warn("An error occurred closing catalog instance, ignoring...", e); | ||
| } | ||
| } | ||
| catalog = null; | ||
| } | ||
| } | ||
|
Comment on lines
+61
to
+82
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can you move these
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I follow your suggestion. When the KC
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think he means just rearrange the code, keep close method just before stop.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have this order to prevent a running committer from having reference to a closed catalog. |
||
|
|
||
| @Override | ||
| public void put(Collection<SinkRecord> sinkRecords) { | ||
| Preconditions.checkNotNull(committer, "Committer wasn't initialized"); | ||
| committer.save(sinkRecords); | ||
| } | ||
|
|
||
| @Override | ||
| public void flush(Map<TopicPartition, OffsetAndMetadata> currentOffsets) { | ||
| Preconditions.checkNotNull(committer, "Committer wasn't initialized"); | ||
| committer.save(null); | ||
| } | ||
|
|
||
| @Override | ||
| public Map<TopicPartition, OffsetAndMetadata> preCommit( | ||
| Map<TopicPartition, OffsetAndMetadata> currentOffsets) { | ||
| // offset commit is handled by the worker | ||
| return ImmutableMap.of(); | ||
| } | ||
|
|
||
| @Override | ||
| public void stop() { | ||
| close(); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.