diff --git a/CMakeLists.txt b/CMakeLists.txt
index 318d7aafe224..c9c0be3c2ce1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -136,7 +136,9 @@ set(
 )
 option(VELOX_ENABLE_EXEC "Build exec." ON)
 option(VELOX_ENABLE_AGGREGATES "Build aggregates." ON)
-option(VELOX_ENABLE_HIVE_CONNECTOR "Build Hive connector." ON)
+option(VELOX_ENABLE_HIVE_CONNECTOR "Build the Hive connector." ON)
+#option(VELOX_ENABLE_HIVE_NEW_CONNECTOR "Build the new Hive connector." ON)
+#option(VELOX_ENABLE_ICEBERG_CONNECTOR "Build the ICEBERG connector that does NOT depend on the new Hive connector." ON)
 option(VELOX_ENABLE_TPCH_CONNECTOR "Build TPC-H connector." ON)
 option(VELOX_ENABLE_TPCDS_CONNECTOR "Build TPC-DS connector." ON)
 option(VELOX_ENABLE_PRESTO_FUNCTIONS "Build Presto SQL functions." ON)
@@ -722,6 +724,7 @@ include_directories(.)
 
 # Adding this down here prevents warnings in dependencies from stopping the
 # build
+set(TREAT_WARNINGS_AS_ERRORS OFF)
 if("${TREAT_WARNINGS_AS_ERRORS}")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
 endif()
diff --git a/velox/connectors/CMakeLists.txt b/velox/connectors/CMakeLists.txt
index 3fd17dde2d29..ee462c5cba57 100644
--- a/velox/connectors/CMakeLists.txt
+++ b/velox/connectors/CMakeLists.txt
@@ -17,6 +17,8 @@ velox_link_libraries(velox_connector velox_common_config velox_vector)
 
 add_subdirectory(fuzzer)
 
+add_subdirectory(lakehouse)
+
 if(${VELOX_ENABLE_HIVE_CONNECTOR})
   add_subdirectory(hive)
 endif()
diff --git a/velox/connectors/Connector.h b/velox/connectors/Connector.h
index 3f91fa8ec4a8..026eecac6476 100644
--- a/velox/connectors/Connector.h
+++ b/velox/connectors/Connector.h
@@ -196,7 +196,7 @@ class DataSink {
     uint64_t recodeTimeNs{0};
     uint64_t compressionTimeNs{0};
 
-    common::SpillStats spillStats;
+    velox::common::SpillStats spillStats;
 
     bool empty() const;
 
diff --git a/velox/connectors/lakehouse/CMakeLists.txt b/velox/connectors/lakehouse/CMakeLists.txt
new file mode 100644
index 000000000000..fb99e6106a91
--- /dev/null
+++ b/velox/connectors/lakehouse/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#add_subdirectory(storage_adapters)
+add_subdirectory(iceberg)
+
diff --git a/velox/connectors/lakehouse/iceberg/CMakeLists.txt b/velox/connectors/lakehouse/iceberg/CMakeLists.txt
new file mode 100644
index 000000000000..15a28423325f
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/CMakeLists.txt
@@ -0,0 +1,59 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+velox_add_library(
+  velox_lakehouse_iceberg_connector
+        ConnectorConfigBase.cpp
+        DataSourceBase.cpp
+        ConnectorSplitBase.cpp
+        ConnectorUtil.cpp
+        FileHandle.cpp
+        SplitReaderBase.cpp
+        TableHandleBase.cpp
+        IcebergConfig.cpp
+        IcebergConnector.cpp
+        IcebergConnectorSplit.cpp
+        IcebergConnectorUtil.cpp
+        IcebergPartitionUtil.cpp
+        PartitionIdGenerator.cpp
+        IcebergDataSource.cpp
+        IcebergPartitionFunction.cpp
+        IcebergSplitReader.cpp
+        IcebergTableHandle.cpp
+        PositionalDeleteFileReader.cpp
+  )
+
+velox_link_libraries(
+        velox_lakehouse_iceberg_connector
+        PRIVATE velox_connector
+        velox_common_io
+        velox_dwio_catalog_fbhive
+        velox_buffer
+        velox_caching
+        velox_common_compression
+        velox_common_config
+        velox_dwio_common_encryption
+        velox_dwio_common_exception
+        velox_exception
+        velox_expression
+        velox_memory
+        velox_type_tz
+        Boost::regex
+        Folly::folly
+        glog::glog
+        protobuf::libprotobuf)
+
+if(${VELOX_BUILD_TESTING})
+  add_subdirectory(tests)
+endif()
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.cpp b/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.cpp
new file mode 100644
index 000000000000..4e02ebd39b7b
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectorConfigBase.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+std::string ConnectorConfigBase::gcsEndpoint() const {
+  return config_->get<std::string>(kGcsEndpoint, std::string(""));
+}
+
+std::string ConnectorConfigBase::gcsCredentialsPath() const {
+  return config_->get<std::string>(kGcsCredentialsPath, std::string(""));
+}
+
+std::optional<int> ConnectorConfigBase::gcsMaxRetryCount() const {
+  return static_cast<std::optional<int>>(config_->get<int>(kGcsMaxRetryCount));
+}
+
+std::optional<std::string> ConnectorConfigBase::gcsMaxRetryTime() const {
+  return static_cast<std::optional<std::string>>(
+      config_->get<std::string>(kGcsMaxRetryTime));
+}
+
+bool ConnectorConfigBase::isOrcUseColumnNames(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kOrcUseColumnNamesSession, config_->get<bool>(kOrcUseColumnNames, false));
+}
+
+bool ConnectorConfigBase::isParquetUseColumnNames(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kParquetUseColumnNamesSession,
+      config_->get<bool>(kParquetUseColumnNames, false));
+}
+
+bool ConnectorConfigBase::isFileColumnNamesReadAsLowerCase(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kFileColumnNamesReadAsLowerCaseSession,
+      config_->get<bool>(kFileColumnNamesReadAsLowerCase, false));
+}
+
+bool ConnectorConfigBase::isPartitionPathAsLowerCase(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(kPartitionPathAsLowerCaseSession, true);
+}
+
+bool ConnectorConfigBase::allowNullPartitionKeys(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kAllowNullPartitionKeysSession,
+      config_->get<bool>(kAllowNullPartitionKeys, true));
+}
+
+int64_t ConnectorConfigBase::maxCoalescedBytes(
+    const config::ConfigBase* session) const {
+  return session->get<int64_t>(
+      kMaxCoalescedBytesSession,
+      config_->get<int64_t>(kMaxCoalescedBytes, 128 << 20)); // 128MB
+}
+
+int32_t ConnectorConfigBase::maxCoalescedDistanceBytes(
+    const config::ConfigBase* session) const {
+  const auto distance = config::toCapacity(
+      session->get<std::string>(
+          kMaxCoalescedDistanceSession,
+          config_->get<std::string>(kMaxCoalescedDistance, "512kB")),
+      config::CapacityUnit::BYTE);
+  VELOX_USER_CHECK_LE(
+      distance,
+      std::numeric_limits<int32_t>::max(),
+      "The max merge distance to combine read requests must be less than 2GB."
+      " Got {} bytes.",
+      distance);
+  return int32_t(distance);
+}
+
+int32_t ConnectorConfigBase::prefetchRowGroups() const {
+  return config_->get<int32_t>(kPrefetchRowGroups, 1);
+}
+
+int32_t ConnectorConfigBase::loadQuantum(const config::ConfigBase* session) const {
+  return session->get<int32_t>(
+      kLoadQuantumSession, config_->get<int32_t>(kLoadQuantum, 8 << 20));
+}
+
+int32_t ConnectorConfigBase::numCacheFileHandles() const {
+  return config_->get<int32_t>(kNumCacheFileHandles, 20'000);
+}
+
+uint64_t ConnectorConfigBase::fileHandleExpirationDurationMs() const {
+  return config_->get<uint64_t>(kFileHandleExpirationDurationMs, 0);
+}
+
+bool ConnectorConfigBase::isFileHandleCacheEnabled() const {
+  return config_->get<bool>(kEnableFileHandleCache, true);
+}
+
+std::string ConnectorConfigBase::writeFileCreateConfig() const {
+  return config_->get<std::string>(kWriteFileCreateConfig, "");
+}
+
+uint64_t ConnectorConfigBase::footerEstimatedSize() const {
+  return config_->get<uint64_t>(kFooterEstimatedSize, 256UL << 10);
+}
+
+uint64_t ConnectorConfigBase::filePreloadThreshold() const {
+  return config_->get<uint64_t>(kFilePreloadThreshold, 8UL << 20);
+}
+
+uint8_t ConnectorConfigBase::readTimestampUnit(
+    const config::ConfigBase* session) const {
+  const auto unit = session->get<uint8_t>(
+      kReadTimestampUnitSession,
+      config_->get<uint8_t>(kReadTimestampUnit, 3 /*milli*/));
+  VELOX_CHECK(
+      unit == 3 || unit == 6 /*micro*/ || unit == 9 /*nano*/,
+      "Invalid timestamp unit.");
+  return unit;
+}
+
+bool ConnectorConfigBase::readTimestampPartitionValueAsLocalTime(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kReadTimestampPartitionValueAsLocalTimeSession,
+      config_->get<bool>(kReadTimestampPartitionValueAsLocalTime, true));
+}
+
+bool ConnectorConfigBase::readStatsBasedFilterReorderDisabled(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kReadStatsBasedFilterReorderDisabledSession,
+      config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.h b/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.h
new file mode 100644
index 000000000000..21f0ea99ebe0
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorConfigBase.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/common/config/Config.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class ConnectorConfigBase {
+ public:
+  /// The GCS storage endpoint server.
+  static constexpr const char* kGcsEndpoint = "gcs.endpoint";
+
+  /// The GCS service account configuration JSON key file.
+  static constexpr const char* kGcsCredentialsPath =
+      "gcs.json-key-file-path";
+
+  /// The GCS maximum retry counter of transient errors.
+  static constexpr const char* kGcsMaxRetryCount = "gcs.max-retry-count";
+
+  /// The GCS maximum time allowed to retry transient errors.
+  static constexpr const char* kGcsMaxRetryTime = "gcs.max-retry-time";
+
+  /// Maps table field names to file field names using names, not indices.
+  // TODO: remove hive_orc_use_column_names since it doesn't exist in presto,
+  // right now this is only used for testing.
+  // TODO: remove  prefix
+  static constexpr const char* kOrcUseColumnNames = "orc.use-column-names";
+  static constexpr const char* kOrcUseColumnNamesSession =
+      "orc_use_column_names";
+
+  /// Maps table field names to file field names using names, not indices.
+  static constexpr const char* kParquetUseColumnNames =
+      "parquet.use-column-names";
+  static constexpr const char* kParquetUseColumnNamesSession =
+      "parquet_use_column_names";
+
+  /// Reads the source file column name as lower case.
+  static constexpr const char* kFileColumnNamesReadAsLowerCase =
+      "file-column-names-read-as-lower-case";
+  static constexpr const char* kFileColumnNamesReadAsLowerCaseSession =
+      "file_column_names_read_as_lower_case";
+
+  static constexpr const char* kPartitionPathAsLowerCaseSession =
+      "partition_path_as_lower_case";
+
+  static constexpr const char* kAllowNullPartitionKeys =
+      "allow-null-partition-keys";
+  static constexpr const char* kAllowNullPartitionKeysSession =
+      "allow_null_partition_keys";
+
+  /// The max coalesce bytes for a request.
+  static constexpr const char* kMaxCoalescedBytes = "max-coalesced-bytes";
+  static constexpr const char* kMaxCoalescedBytesSession =
+      "max-coalesced-bytes";
+
+  /// The max merge distance to combine read requests.
+  /// Note: The session property name differs from the constant name for
+  /// backward compatibility with Presto.
+  static constexpr const char* kMaxCoalescedDistance = "max-coalesced-distance";
+  static constexpr const char* kMaxCoalescedDistanceSession =
+      "orc_max_merge_distance";
+
+  /// The number of prefetch rowgroups
+  static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups";
+
+  /// The total size in bytes for a direct coalesce request. Up to 8MB load
+  /// quantum size is supported when SSD cache is enabled.
+  static constexpr const char* kLoadQuantum = "load-quantum";
+  static constexpr const char* kLoadQuantumSession = "load-quantum";
+
+  /// Maximum number of entries in the file handle cache.
+  static constexpr const char* kNumCacheFileHandles = "num_cached_file_handles";
+
+  /// Expiration time in ms for a file handle in the cache. A value of 0
+  /// means cache will not evict the handle after kFileHandleExprationDurationMs
+  /// has passed.
+  static constexpr const char* kFileHandleExpirationDurationMs =
+      "file-handle-expiration-duration-ms";
+
+  /// Enable file handle cache.
+  static constexpr const char* kEnableFileHandleCache =
+      "file-handle-cache-enabled";
+
+  /// The size in bytes to be fetched with Meta data together, used when the
+  /// data after meta data will be used later. Optimization to decrease small IO
+  /// request
+  static constexpr const char* kFooterEstimatedSize = "footer-estimated-size";
+
+  /// The threshold of file size in bytes when the whole file is fetched with
+  /// meta data together. Optimization to decrease the small IO requests
+  static constexpr const char* kFilePreloadThreshold = "file-preload-threshold";
+
+  /// Config used to create write files. This config is provided to underlying
+  /// file system through hive connector and data sink. The config is free form.
+  /// The form should be defined by the underlying file system.
+  static constexpr const char* kWriteFileCreateConfig =
+      "write_file_create_config";
+
+  // The unit for reading timestamps from files.
+  static constexpr const char* kReadTimestampUnit =
+      "reader.timestamp-unit";
+  static constexpr const char* kReadTimestampUnitSession =
+      "reader.timestamp_unit";
+
+  static constexpr const char* kReadTimestampPartitionValueAsLocalTime =
+      "reader.timestamp-partition-value-as-local-time";
+  static constexpr const char* kReadTimestampPartitionValueAsLocalTimeSession =
+      "reader.timestamp_partition_value_as_local_time";
+
+  static constexpr const char* kReadStatsBasedFilterReorderDisabled =
+      "stats-based-filter-reorder-disabled";
+  static constexpr const char* kReadStatsBasedFilterReorderDisabledSession =
+      "stats_based_filter_reorder_disabled";
+
+  template <typename T>
+  T* as() {
+    static_assert(std::is_base_of_v<ConnectorConfigBase, T>);
+    return dynamic_cast<T*>(this);
+  }
+
+  template <typename T>
+  const T* as() const {
+    static_assert(std::is_base_of_v<ConnectorConfigBase, T>);
+    return dynamic_cast<const T*>(this);
+  }
+
+  std::string gcsEndpoint() const;
+
+  std::string gcsCredentialsPath() const;
+
+  std::optional<int> gcsMaxRetryCount() const;
+
+  std::optional<std::string> gcsMaxRetryTime() const;
+
+  bool isOrcUseColumnNames(const velox::config::ConfigBase* session) const;
+
+  bool isParquetUseColumnNames(const velox::config::ConfigBase* session) const;
+
+  bool isFileColumnNamesReadAsLowerCase(
+      const velox::config::ConfigBase* session) const;
+
+  bool isPartitionPathAsLowerCase(
+      const velox::config::ConfigBase* session) const;
+
+  bool allowNullPartitionKeys(const velox::config::ConfigBase* session) const;
+
+  //  bool ignoreMissingFiles(const velox::config::ConfigBase* session) const;
+
+  int64_t maxCoalescedBytes(const velox::config::ConfigBase* session) const;
+
+  int32_t maxCoalescedDistanceBytes(
+      const velox::config::ConfigBase* session) const;
+
+  int32_t prefetchRowGroups() const;
+
+  int32_t loadQuantum(const velox::config::ConfigBase* session) const;
+
+  int32_t numCacheFileHandles() const;
+
+  uint64_t fileHandleExpirationDurationMs() const;
+
+  bool isFileHandleCacheEnabled() const;
+
+  uint64_t fileWriterFlushThresholdBytes() const;
+
+  // Used by spiller
+  std::string writeFileCreateConfig() const;
+
+  uint64_t footerEstimatedSize() const;
+
+  uint64_t filePreloadThreshold() const;
+
+  // Returns the timestamp unit used when reading timestamps from files.
+  uint8_t readTimestampUnit(const velox::config::ConfigBase* session) const;
+
+  // Whether to read timestamp partition value as local time. If false, read as
+  // UTC.
+  bool readTimestampPartitionValueAsLocalTime(
+      const velox::config::ConfigBase* session) const;
+
+  /// Returns true if the stats based filter reorder for read is disabled.
+  bool readStatsBasedFilterReorderDisabled(
+      const velox::config::ConfigBase* session) const;
+
+  ConnectorConfigBase(std::shared_ptr<const velox::config::ConfigBase> config) {
+    VELOX_CHECK_NOT_NULL(
+        config, "Config is null for IcebergConfig initialization");
+    config_ = std::move(config);
+    // TODO: add sanity check
+  }
+
+  const std::shared_ptr<const velox::config::ConfigBase>& config() const {
+    return config_;
+  }
+
+ protected:
+  std::shared_ptr<const velox::config::ConfigBase> config_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.cpp b/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.cpp
new file mode 100644
index 000000000000..fdfac084849d
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectorSplitBase.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+std::string ConnectorSplitBase::getFileName() const {
+  const auto i = filePath.rfind('/');
+  return i == std::string::npos ? filePath : filePath.substr(i + 1);
+}
+
+folly::dynamic ConnectorSplitBase::serializeBase(
+    const std::string& className) const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = className;
+  obj["connectorId"] = connectorId;
+  obj["splitWeight"] = splitWeight;
+  obj["cacheable"] = cacheable;
+  obj["filePath"] = filePath;
+  obj["fileFormat"] = dwio::common::toString(fileFormat);
+  obj["start"] = start;
+  obj["length"] = length;
+
+  folly::dynamic serdeParametersObj = folly::dynamic::object;
+  for (const auto& [key, value] : serdeParameters) {
+    serdeParametersObj[key] = value;
+  }
+  obj["serdeParameters"] = serdeParametersObj;
+
+  folly::dynamic storageParametersObj = folly::dynamic::object;
+  for (const auto& [key, value] : storageParameters) {
+    storageParametersObj[key] = value;
+  }
+  obj["storageParameters"] = storageParametersObj;
+
+  folly::dynamic infoColumnsObj = folly::dynamic::object;
+  for (const auto& [key, value] : infoColumns) {
+    infoColumnsObj[key] = value;
+  }
+  obj["infoColumns"] = infoColumnsObj;
+
+  if (properties.has_value()) {
+    folly::dynamic propertiesObj = folly::dynamic::object;
+    propertiesObj["fileSize"] = properties->fileSize.has_value()
+        ? folly::dynamic(properties->fileSize.value())
+        : nullptr;
+    propertiesObj["modificationTime"] = properties->modificationTime.has_value()
+        ? folly::dynamic(properties->modificationTime.value())
+        : nullptr;
+    obj["properties"] = propertiesObj;
+  }
+
+  return obj;
+}
+
+std::string ConnectorSplitBase::toStringBase(const std::string& className) const {
+  return fmt::format(
+      "{} [{}: {} {} - {}, fileFormat: {}",
+      className,
+      connectorId,
+      filePath,
+      start,
+      length,
+      fileFormat);
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.h b/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.h
new file mode 100644
index 000000000000..7922ee161999
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorSplitBase.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/connectors/lakehouse/iceberg/FileProperties.h"
+#include "velox/connectors/Connector.h"
+#include "velox/dwio/common/Options.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+struct ConnectorSplitBase : public connector::ConnectorSplit {
+  const std::string filePath;
+  const dwio::common::FileFormat fileFormat;
+  const uint64_t start;
+  const uint64_t length;
+
+  /// Mapping from partition keys to values. Values are specified as strings
+  /// formatted the same way as CAST(x as VARCHAR). Null values are specified as
+  /// std::nullopt. Date values must be formatted using ISO 8601 as YYYY-MM-DD.
+  /// All scalar types and date type are supported.
+  const std::unordered_map<std::string, std::optional<std::string>>
+      partitionKeys;
+
+  // Parameters that are provided as the serialization options.
+  std::unordered_map<std::string, std::string> serdeParameters;
+  // Parameters that are provided as the physical storage properties.
+  std::unordered_map<std::string, std::string> storageParameters;
+
+  /// These represent columns like $file_size, $file_modified_time that are
+  /// associated with the ConnectorSplit.
+  std::unordered_map<std::string, std::string> infoColumns;
+
+  /// These represent file properties like file size that are used while opening
+  /// the file handle.
+  std::optional<FileProperties> properties;
+
+  ConnectorSplitBase(
+      const std::string& _connectorId,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _start = 0,
+      uint64_t _length = std::numeric_limits<uint64_t>::max(),
+      const std::unordered_map<std::string, std::optional<std::string>>&
+          _partitionKeys = {},
+      const std::unordered_map<std::string, std::string>& _serdeParameters = {},
+      const std::unordered_map<std::string, std::string>& _storageParameters =
+          {},
+      int64_t _splitWeight = 0,
+      bool _cacheable = true,
+      const std::unordered_map<std::string, std::string>& _infoColumns = {},
+      std::optional<FileProperties> _properties = std::nullopt)
+      : ConnectorSplit(_connectorId, _splitWeight, _cacheable),
+        filePath(_filePath),
+        fileFormat(_fileFormat),
+        start(_start),
+        length(_length),
+        partitionKeys(_partitionKeys),
+        serdeParameters(_serdeParameters),
+        storageParameters(_storageParameters),
+        infoColumns(_infoColumns),
+        properties(_properties) {}
+
+  template <typename T>
+  T* as() {
+    static_assert(std::is_base_of_v<ConnectorSplitBase, T>);
+    return dynamic_cast<T*>(this);
+  }
+
+  template <typename T>
+  const T* as() const {
+    static_assert(std::is_base_of_v<ConnectorSplitBase, T>);
+    return dynamic_cast<const T*>(this);
+  }
+
+  std::string getFileName() const;
+
+ protected:
+
+
+  folly::dynamic serializeBase(const std::string& className) const;
+
+  std::string toStringBase(
+      const std::string& className) const;
+
+};
+
+template <typename DerivedConnectorSplitBuilder>
+class ConnectorSplitBuilder {
+ public:
+  explicit ConnectorSplitBuilder(std::string filePath)
+      : filePath_(std::move(filePath)) {
+    infoColumns_["$path"] = filePath_;
+  }
+
+  ~ConnectorSplitBuilder() = default;
+
+  DerivedConnectorSplitBuilder& start(uint64_t start) {
+    start_ = start;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& length(uint64_t length) {
+    length_ = length;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& splitWeight(int64_t splitWeight) {
+    splitWeight_ = splitWeight;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& cacheable(bool cacheable) {
+    cacheable_ = cacheable;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& fileFormat(dwio::common::FileFormat format) {
+    fileFormat_ = format;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& infoColumn(
+      const std::string& name,
+      const std::string& value) {
+    infoColumns_.emplace(std::move(name), std::move(value));
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& serdeParameters(
+      const std::unordered_map<std::string, std::string>& serdeParameters) {
+    serdeParameters_ = serdeParameters;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& storageParameters(
+      const std::unordered_map<std::string, std::string>& storageParameters) {
+    storageParameters_ = storageParameters;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& connectorId(const std::string& connectorId) {
+    connectorId_ = connectorId;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+  DerivedConnectorSplitBuilder& fileProperties(FileProperties fileProperties) {
+    fileProperties_ = fileProperties;
+    return static_cast<DerivedConnectorSplitBuilder&>(*this);
+  }
+
+ protected:
+  const std::string filePath_;
+  //  dwio::common::FileFormat fileFormat_{dwio::common::FileFormat::DWRF};
+  dwio::common::FileFormat fileFormat_;
+  uint64_t start_{0};
+  uint64_t length_{std::numeric_limits<uint64_t>::max()};
+  std::unordered_map<std::string, std::string> serdeParameters_ = {};
+  std::unordered_map<std::string, std::string> storageParameters_ = {};
+  std::unordered_map<std::string, std::string> infoColumns_ = {};
+  std::string connectorId_;
+  int64_t splitWeight_{0};
+  bool cacheable_{true};
+  std::optional<FileProperties> fileProperties_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorUtil.cpp b/velox/connectors/lakehouse/iceberg/ConnectorUtil.cpp
new file mode 100644
index 000000000000..0694df059c9d
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorUtil.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectorUtil.h"
+
+#include "velox/dwio/common/CachedBufferedInput.h"
+#include "velox/dwio/common/DirectBufferedInput.h"
+#include "velox/dwio/common/Reader.h"
+#include "velox/expression/Expr.h"
+#include "velox/expression/ExprToSubfieldFilter.h"
+
+using namespace facebook::velox;
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+using namespace facebook::velox::common;
+
+namespace {
+
+core::CallTypedExprPtr replaceInputs(
+    const core::CallTypedExpr* call,
+    std::vector<core::TypedExprPtr>&& inputs) {
+  return std::make_shared<core::CallTypedExpr>(
+      call->type(), std::move(inputs), call->name());
+}
+
+bool endWith(const std::string& str, const char* suffix) {
+  int len = strlen(suffix);
+  if (str.size() < len) {
+    return false;
+  }
+  for (int i = 0, j = str.size() - len; i < len; ++i, ++j) {
+    if (str[j] != suffix[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool isNotExpr(
+    const core::TypedExprPtr& expr,
+    const core::CallTypedExpr* call,
+    core::ExpressionEvaluator* evaluator) {
+  if (!endWith(call->name(), "not")) {
+    return false;
+  }
+  auto exprs = evaluator->compile(expr);
+  VELOX_CHECK_EQ(exprs->size(), 1);
+  auto& compiled = exprs->expr(0);
+  return compiled->vectorFunction() &&
+      compiled->vectorFunction()->getCanonicalName() ==
+      exec::FunctionCanonicalName::kNot;
+}
+
+double getPrestoSampleRate(
+    const core::TypedExprPtr& expr,
+    const core::CallTypedExpr* call,
+    core::ExpressionEvaluator* evaluator) {
+  if (!endWith(call->name(), "lt")) {
+    return -1;
+  }
+  VELOX_CHECK_EQ(call->inputs().size(), 2);
+  auto exprs = evaluator->compile(expr);
+  VELOX_CHECK_EQ(exprs->size(), 1);
+  auto& lt = exprs->expr(0);
+  if (!(lt->vectorFunction() &&
+        lt->vectorFunction()->getCanonicalName() ==
+            exec::FunctionCanonicalName::kLt)) {
+    return -1;
+  }
+  auto& rand = lt->inputs()[0];
+  if (!(rand->inputs().empty() && rand->vectorFunction() &&
+        rand->vectorFunction()->getCanonicalName() ==
+            exec::FunctionCanonicalName::kRand)) {
+    return -1;
+  }
+  auto* rate =
+      dynamic_cast<const core::ConstantTypedExpr*>(call->inputs()[1].get());
+  if (!(rate && rate->type()->kind() == TypeKind::DOUBLE)) {
+    return -1;
+  }
+  return std::max(0.0, std::min(1.0, rate->value().value<double>()));
+}
+
+template <typename T>
+void deduplicate(std::vector<T>& values) {
+  std::sort(values.begin(), values.end());
+  values.erase(std::unique(values.begin(), values.end()), values.end());
+}
+
+// Floating point map key subscripts are truncated toward 0 in Presto.  For
+// example given `a' as a map with floating point key, if user queries a[0.99],
+// Presto coordinator will generate a required subfield a[0]; for a[-1.99] it
+// will generate a[-1]; for anything larger than 9223372036854775807, it
+// generates a[9223372036854775807]; for anything smaller than
+// -9223372036854775808 it generates a[-9223372036854775808].
+template <typename T>
+std::unique_ptr<velox::common::Filter> makeFloatingPointMapKeyFilter(
+    const std::vector<int64_t>& subscripts) {
+  std::vector<std::unique_ptr<velox::common::Filter>> filters;
+  for (auto subscript : subscripts) {
+    T lower = subscript;
+    T upper = subscript;
+    bool lowerUnbounded = subscript == std::numeric_limits<int64_t>::min();
+    bool upperUnbounded = subscript == std::numeric_limits<int64_t>::max();
+    bool lowerExclusive = false;
+    bool upperExclusive = false;
+    if (lower <= 0 && !lowerUnbounded) {
+      if (lower > subscript - 1) {
+        lower = subscript - 1;
+      } else {
+        lower = std::nextafter(lower, -std::numeric_limits<T>::infinity());
+      }
+      lowerExclusive = true;
+    }
+    if (upper >= 0 && !upperUnbounded) {
+      if (upper < subscript + 1) {
+        upper = subscript + 1;
+      } else {
+        upper = std::nextafter(upper, std::numeric_limits<T>::infinity());
+      }
+      upperExclusive = true;
+    }
+    if (lowerUnbounded && upperUnbounded) {
+      continue;
+    }
+    filters.push_back(
+        std::make_unique<velox::common::FloatingPointRange<T>>(
+            lower,
+            lowerUnbounded,
+            lowerExclusive,
+            upper,
+            upperUnbounded,
+            upperExclusive,
+            false));
+  }
+  if (filters.size() == 1) {
+    return std::move(filters[0]);
+  }
+  return std::make_unique<velox::common::MultiRange>(std::move(filters), false);
+}
+
+inline uint8_t parseDelimiter(const std::string& delim) {
+  for (char const& ch : delim) {
+    if (!std::isdigit(ch)) {
+      return delim[0];
+    }
+  }
+  return stoi(delim);
+}
+
+std::unique_ptr<dwio::common::SerDeOptions> parseSerdeParameters(
+    const std::unordered_map<std::string, std::string>& serdeParameters,
+    const std::unordered_map<std::string, std::string>& tableParameters) {
+  auto fieldIt = serdeParameters.find(dwio::common::SerDeOptions::kFieldDelim);
+  if (fieldIt == serdeParameters.end()) {
+    fieldIt = serdeParameters.find("serialization.format");
+  }
+  auto collectionIt =
+      serdeParameters.find(dwio::common::SerDeOptions::kCollectionDelim);
+  if (collectionIt == serdeParameters.end()) {
+    // For collection delimiter, Hive 1.x, 2.x uses "colelction.delim", but
+    // Hive 3.x uses "collection.delim".
+    // See: https://issues.apache.org/jira/browse/HIVE-16922)
+    collectionIt = serdeParameters.find("colelction.delim");
+  }
+  auto mapKeyIt =
+      serdeParameters.find(dwio::common::SerDeOptions::kMapKeyDelim);
+
+  auto escapeCharIt =
+      serdeParameters.find(dwio::common::SerDeOptions::kEscapeChar);
+
+  auto nullStringIt = tableParameters.find(
+      dwio::common::TableParameter::kSerializationNullFormat);
+
+  if (fieldIt == serdeParameters.end() &&
+      collectionIt == serdeParameters.end() &&
+      mapKeyIt == serdeParameters.end() &&
+      escapeCharIt == serdeParameters.end() &&
+      nullStringIt == tableParameters.end()) {
+    return nullptr;
+  }
+
+  uint8_t fieldDelim = '\1';
+  uint8_t collectionDelim = '\2';
+  uint8_t mapKeyDelim = '\3';
+  if (fieldIt != serdeParameters.end()) {
+    fieldDelim = parseDelimiter(fieldIt->second);
+  }
+  if (collectionIt != serdeParameters.end()) {
+    collectionDelim = parseDelimiter(collectionIt->second);
+  }
+  if (mapKeyIt != serdeParameters.end()) {
+    mapKeyDelim = parseDelimiter(mapKeyIt->second);
+  }
+
+  // If escape character is specified then we use it, unless it is empty - in
+  // which case we default to '\\'.
+  // If escape character is not specified (not in the map) we turn escaping off.
+  // Logic is based on apache hive java code:
+  // https://github.com/apache/hive/blob/3f6f940af3f60cc28834268e5d7f5612e3b13c30/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java#L105-L108
+  uint8_t escapeChar = '\\';
+  const bool hasEscapeChar = (escapeCharIt != serdeParameters.end());
+  if (hasEscapeChar) {
+    if (!escapeCharIt->second.empty()) {
+      // If delim is convertible to uint8_t then we use it as character code,
+      // otherwise we use the 1st character of the string.
+      escapeChar = folly::tryTo<uint8_t>(escapeCharIt->second)
+                       .value_or(escapeCharIt->second[0]);
+    }
+  }
+
+  auto serDeOptions = hasEscapeChar
+      ? std::make_unique<dwio::common::SerDeOptions>(
+            fieldDelim, collectionDelim, mapKeyDelim, escapeChar, true)
+      : std::make_unique<dwio::common::SerDeOptions>(
+            fieldDelim, collectionDelim, mapKeyDelim);
+  if (nullStringIt != tableParameters.end()) {
+    serDeOptions->nullString = nullStringIt->second;
+  }
+  return serDeOptions;
+}
+
+} // namespace
+
+bool isSynthesizedColumn(
+    const std::string& name,
+    const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>&
+        infoColumns) {
+  return infoColumns.count(name) != 0;
+}
+
+bool isSpecialColumn(
+    const std::string& name,
+    const std::optional<std::string>& specialName) {
+  return specialName.has_value() && name == *specialName;
+}
+
+void processFieldSpec(
+    const RowTypePtr& dataColumns,
+    const TypePtr& outputType,
+    velox::common::ScanSpec& fieldSpec) {
+  fieldSpec.visit(
+      *outputType, [](const Type& type, velox::common::ScanSpec& spec) {
+        if (type.isMap() && !spec.isConstant()) {
+          auto* keys =
+              spec.childByName(velox::common::ScanSpec::kMapKeysFieldName);
+          VELOX_CHECK_NOT_NULL(keys);
+          keys->setFilter(std::make_shared<common::IsNotNull>());
+        }
+      });
+  if (dataColumns) {
+    auto i = dataColumns->getChildIdxIfExists(fieldSpec.fieldName());
+    if (i.has_value()) {
+      if (dataColumns->childAt(*i)->isMap() && outputType->isRow()) {
+        fieldSpec.setFlatMapAsStruct(true);
+      }
+    }
+  }
+}
+
+const std::string& getColumnName(const velox::common::Subfield& subfield) {
+  VELOX_CHECK_GT(subfield.path().size(), 0);
+  auto* field =
+      dynamic_cast<const Subfield::NestedField*>(subfield.path()[0].get());
+  VELOX_CHECK_NOT_NULL(field);
+  return field->name();
+}
+
+void checkColumnNameLowerCase(const std::shared_ptr<const Type>& type) {
+  switch (type->kind()) {
+    case TypeKind::ARRAY:
+      checkColumnNameLowerCase(type->asArray().elementType());
+      break;
+    case TypeKind::MAP: {
+      checkColumnNameLowerCase(type->asMap().keyType());
+      checkColumnNameLowerCase(type->asMap().valueType());
+
+    } break;
+    case TypeKind::ROW: {
+      for (const auto& outputName : type->asRow().names()) {
+        VELOX_CHECK(
+            !std::any_of(outputName.begin(), outputName.end(), isupper));
+      }
+      for (auto& childType : type->asRow().children()) {
+        checkColumnNameLowerCase(childType);
+      }
+    } break;
+    default:
+      VLOG(1) << "No need to check type lowercase mode" << type->toString();
+  }
+}
+
+void checkColumnNameLowerCase(
+    const velox::common::SubfieldFilters& filters,
+    const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>&
+        infoColumns) {
+  for (const auto& filterIt : filters) {
+    const auto name = filterIt.first.toString();
+    if (isSynthesizedColumn(name, infoColumns)) {
+      continue;
+    }
+    const auto& path = filterIt.first.path();
+
+    for (int i = 0; i < path.size(); ++i) {
+      auto* nestedField =
+          dynamic_cast<const velox::common::Subfield::NestedField*>(
+              path[i].get());
+      if (nestedField == nullptr) {
+        continue;
+      }
+      VELOX_CHECK(!std::any_of(
+          nestedField->name().begin(), nestedField->name().end(), isupper));
+    }
+  }
+}
+
+void checkColumnNameLowerCase(const core::TypedExprPtr& typeExpr) {
+  if (typeExpr == nullptr) {
+    return;
+  }
+  checkColumnNameLowerCase(typeExpr->type());
+  for (auto& type : typeExpr->inputs()) {
+    checkColumnNameLowerCase(type);
+  }
+}
+
+void configureReaderOptions(
+    const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<const TableHandleBase>& tableHandle,
+    const std::shared_ptr<const ConnectorSplitBase>& split,
+    dwio::common::ReaderOptions& readerOptions) {
+  configureReaderOptions(
+      ConnectorConfigBase,
+      connectorQueryCtx,
+      tableHandle->dataColumns(),
+      split,
+      tableHandle->tableParameters(),
+      readerOptions);
+}
+
+void configureReaderOptions(
+    const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const RowTypePtr& fileSchema,
+    const std::shared_ptr<const ConnectorSplitBase>& split,
+    const std::unordered_map<std::string, std::string>& tableParameters,
+    dwio::common::ReaderOptions& readerOptions) {
+  auto sessionProperties = connectorQueryCtx->sessionProperties();
+
+  readerOptions.setLoadQuantum(ConnectorConfigBase->loadQuantum(sessionProperties));
+  readerOptions.setMaxCoalesceBytes(
+      ConnectorConfigBase->maxCoalescedBytes(sessionProperties));
+  readerOptions.setMaxCoalesceDistance(
+      ConnectorConfigBase->maxCoalescedDistanceBytes(sessionProperties));
+  readerOptions.setFileColumnNamesReadAsLowerCase(
+      ConnectorConfigBase->isFileColumnNamesReadAsLowerCase(sessionProperties));
+
+  bool useColumnNamesForColumnMapping = false;
+  switch (split->fileFormat) {
+    case dwio::common::FileFormat::DWRF:
+    case dwio::common::FileFormat::ORC: {
+      useColumnNamesForColumnMapping =
+          ConnectorConfigBase->isOrcUseColumnNames(sessionProperties);
+      break;
+    }
+    case dwio::common::FileFormat::PARQUET: {
+      useColumnNamesForColumnMapping =
+          ConnectorConfigBase->isParquetUseColumnNames(sessionProperties);
+      break;
+    }
+    default:
+      useColumnNamesForColumnMapping = false;
+  }
+  readerOptions.setUseColumnNamesForColumnMapping(
+      useColumnNamesForColumnMapping);
+
+  readerOptions.setFileSchema(fileSchema);
+  readerOptions.setFooterEstimatedSize(ConnectorConfigBase->footerEstimatedSize());
+  readerOptions.setFilePreloadThreshold(
+      ConnectorConfigBase->filePreloadThreshold());
+  readerOptions.setPrefetchRowGroups(ConnectorConfigBase->prefetchRowGroups());
+  readerOptions.setNoCacheRetention(!split->cacheable);
+
+  const auto& sessionTzName = connectorQueryCtx->sessionTimezone();
+  if (!sessionTzName.empty()) {
+    const auto timezone = tz::locateZone(sessionTzName);
+    readerOptions.setSessionTimezone(timezone);
+  }
+  readerOptions.setAdjustTimestampToTimezone(
+      connectorQueryCtx->adjustTimestampToTimezone());
+  readerOptions.setSelectiveNimbleReaderEnabled(
+      connectorQueryCtx->selectiveNimbleReaderEnabled());
+
+  if (readerOptions.fileFormat() != dwio::common::FileFormat::UNKNOWN) {
+    VELOX_CHECK(
+        readerOptions.fileFormat() == split->fileFormat,
+        "DataSource received splits of different formats: {} and {}",
+        dwio::common::toString(readerOptions.fileFormat()),
+        dwio::common::toString(split->fileFormat));
+  } else {
+    auto serDeOptions =
+        parseSerdeParameters(split->serdeParameters, tableParameters);
+    if (serDeOptions) {
+      readerOptions.setSerDeOptions(*serDeOptions);
+    }
+
+    readerOptions.setFileFormat(split->fileFormat);
+  }
+}
+
+void configureRowReaderOptions(
+    const std::unordered_map<std::string, std::string>& tableParameters,
+    const std::shared_ptr<velox::common::ScanSpec>& scanSpec,
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    const RowTypePtr& rowType,
+    const std::shared_ptr<const ConnectorSplitBase>& split,
+    const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+    const config::ConfigBase* sessionProperties,
+    dwio::common::RowReaderOptions& rowReaderOptions) {
+  auto skipRowsIt =
+      tableParameters.find(dwio::common::TableParameter::kSkipHeaderLineCount);
+  if (skipRowsIt != tableParameters.end()) {
+    rowReaderOptions.setSkipRows(folly::to<uint64_t>(skipRowsIt->second));
+  }
+  rowReaderOptions.setScanSpec(scanSpec);
+  rowReaderOptions.setMetadataFilter(std::move(metadataFilter));
+  rowReaderOptions.setRequestedType(rowType);
+  rowReaderOptions.range(split->start, split->length);
+  if (ConnectorConfigBase && sessionProperties) {
+    rowReaderOptions.setTimestampPrecision(
+        static_cast<TimestampPrecision>(
+            ConnectorConfigBase->readTimestampUnit(sessionProperties)));
+  }
+}
+
+// Recursively add subfields to scan spec.
+void addSubfields(
+    const Type& type,
+    std::vector<SubfieldSpec>& subfields,
+    int level,
+    memory::MemoryPool* pool,
+    velox::common::ScanSpec& spec) {
+  int newSize = 0;
+  for (int i = 0; i < subfields.size(); ++i) {
+    if (level < subfields[i].subfield->path().size()) {
+      subfields[newSize++] = subfields[i];
+    } else if (!subfields[i].filterOnly) {
+      spec.addAllChildFields(type);
+      return;
+    }
+  }
+  subfields.resize(newSize);
+  switch (type.kind()) {
+    case TypeKind::ROW: {
+      folly::F14FastMap<std::string, std::vector<SubfieldSpec>> required;
+      for (auto& subfield : subfields) {
+        auto* element = subfield.subfield->path()[level].get();
+        auto* nestedField =
+            dynamic_cast<const velox::common::Subfield::NestedField*>(element);
+        VELOX_CHECK(
+            nestedField,
+            "Unsupported for row subfields pruning: {}",
+            element->toString());
+        required[nestedField->name()].push_back(subfield);
+      }
+      auto& rowType = type.asRow();
+      for (int i = 0; i < rowType.size(); ++i) {
+        auto& childName = rowType.nameOf(i);
+        auto& childType = rowType.childAt(i);
+        auto* child = spec.addField(childName, i);
+        auto it = required.find(childName);
+        if (it == required.end()) {
+          child->setConstantValue(
+              BaseVector::createNullConstant(childType, 1, pool));
+        } else {
+          addSubfields(*childType, it->second, level + 1, pool, *child);
+        }
+      }
+      break;
+    }
+    case TypeKind::MAP: {
+      auto& keyType = type.childAt(0);
+      auto* keys = spec.addMapKeyFieldRecursively(*keyType);
+      addSubfields(
+          *type.childAt(1),
+          subfields,
+          level + 1,
+          pool,
+          *spec.addMapValueField());
+      if (subfields.empty()) {
+        return;
+      }
+      bool stringKey = keyType->isVarchar() || keyType->isVarbinary();
+      std::vector<std::string> stringSubscripts;
+      std::vector<int64_t> longSubscripts;
+      for (auto& subfield : subfields) {
+        auto* element = subfield.subfield->path()[level].get();
+        if (dynamic_cast<const velox::common::Subfield::AllSubscripts*>(
+                element)) {
+          return;
+        }
+        if (stringKey) {
+          auto* subscript =
+              dynamic_cast<const velox::common::Subfield::StringSubscript*>(
+                  element);
+          VELOX_CHECK(
+              subscript,
+              "Unsupported for string map pruning: {}",
+              element->toString());
+          stringSubscripts.push_back(subscript->index());
+        } else {
+          auto* subscript =
+              dynamic_cast<const velox::common::Subfield::LongSubscript*>(
+                  element);
+          VELOX_CHECK(
+              subscript,
+              "Unsupported for long map pruning: {}",
+              element->toString());
+          longSubscripts.push_back(subscript->index());
+        }
+      }
+      std::unique_ptr<velox::common::Filter> filter;
+      if (stringKey) {
+        deduplicate(stringSubscripts);
+        filter = std::make_unique<velox::common::BytesValues>(
+            stringSubscripts, false);
+        spec.setFlatMapFeatureSelection(std::move(stringSubscripts));
+      } else {
+        deduplicate(longSubscripts);
+        if (keyType->isReal()) {
+          filter = makeFloatingPointMapKeyFilter<float>(longSubscripts);
+        } else if (keyType->isDouble()) {
+          filter = makeFloatingPointMapKeyFilter<double>(longSubscripts);
+        } else {
+          filter = velox::common::createBigintValues(longSubscripts, false);
+        }
+        std::vector<std::string> features;
+        for (auto num : longSubscripts) {
+          features.push_back(std::to_string(num));
+        }
+        spec.setFlatMapFeatureSelection(std::move(features));
+      }
+      keys->setFilter(std::move(filter));
+      break;
+    }
+    case TypeKind::ARRAY: {
+      addSubfields(
+          *type.childAt(0),
+          subfields,
+          level + 1,
+          pool,
+          *spec.addArrayElementField());
+      if (subfields.empty()) {
+        return;
+      }
+      constexpr long kMaxIndex = std::numeric_limits<vector_size_t>::max();
+      long maxIndex = -1;
+      for (auto& subfield : subfields) {
+        auto* element = subfield.subfield->path()[level].get();
+        if (dynamic_cast<const velox::common::Subfield::AllSubscripts*>(
+                element)) {
+          return;
+        }
+        auto* subscript =
+            dynamic_cast<const velox::common::Subfield::LongSubscript*>(
+                element);
+        VELOX_CHECK(
+            subscript,
+            "Unsupported for array pruning: {}",
+            element->toString());
+        VELOX_USER_CHECK_GT(
+            subscript->index(),
+            0,
+            "Non-positive array subscript cannot be push down");
+        maxIndex = std::max(maxIndex, std::min(kMaxIndex, subscript->index()));
+      }
+      spec.setMaxArrayElementsCount(maxIndex);
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+std::unique_ptr<dwio::common::BufferedInput> createBufferedInput(
+    const FileHandle& fileHandle,
+    const dwio::common::ReaderOptions& readerOpts,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    std::shared_ptr<io::IoStatistics> ioStats,
+    std::shared_ptr<filesystems::File::IoStats> fsStats,
+    folly::Executor* executor) {
+  if (connectorQueryCtx->cache()) {
+    return std::make_unique<dwio::common::CachedBufferedInput>(
+        fileHandle.file,
+        dwio::common::MetricsLog::voidLog(),
+        fileHandle.uuid,
+        connectorQueryCtx->cache(),
+        Connector::getTracker(
+            connectorQueryCtx->scanId(), readerOpts.loadQuantum()),
+        fileHandle.groupId,
+        ioStats,
+        std::move(fsStats),
+        executor,
+        readerOpts);
+  }
+  if (readerOpts.fileFormat() == dwio::common::FileFormat::NIMBLE) {
+    // Nimble streams (in case of single chunk) are compressed as whole and need
+    // to be fully fetched in order to do decompression, so there is no point to
+    // fetch them by quanta.  Just use BufferedInput to fetch streams as whole
+    // to reduce memory footprint.
+    return std::make_unique<dwio::common::BufferedInput>(
+        fileHandle.file,
+        readerOpts.memoryPool(),
+        dwio::common::MetricsLog::voidLog(),
+        ioStats.get(),
+        fsStats.get());
+  }
+  return std::make_unique<dwio::common::DirectBufferedInput>(
+      fileHandle.file,
+      dwio::common::MetricsLog::voidLog(),
+      fileHandle.uuid,
+      Connector::getTracker(
+          connectorQueryCtx->scanId(), readerOpts.loadQuantum()),
+      fileHandle.groupId,
+      std::move(ioStats),
+      std::move(fsStats),
+      executor,
+      readerOpts);
+}
+
+velox::core::TypedExprPtr extractFiltersFromRemainingFilter(
+    const core::TypedExprPtr& expr,
+    core::ExpressionEvaluator* evaluator,
+    bool negated,
+    SubfieldFilters& filters,
+    double& sampleRate) {
+  auto* call = dynamic_cast<const core::CallTypedExpr*>(expr.get());
+  if (call == nullptr) {
+    return expr;
+  }
+  Filter* oldFilter = nullptr;
+  try {
+    Subfield subfield;
+    if (auto filter = exec::ExprToSubfieldFilterParser::getInstance()
+                          ->leafCallToSubfieldFilter(
+                              *call, subfield, evaluator, negated)) {
+      if (auto it = filters.find(subfield); it != filters.end()) {
+        oldFilter = it->second.get();
+        filter = filter->mergeWith(oldFilter);
+      }
+      filters.insert_or_assign(std::move(subfield), std::move(filter));
+      return nullptr;
+    }
+  } catch (const VeloxException&) {
+    LOG(WARNING) << "Unexpected failure when extracting filter for: "
+                 << expr->toString();
+    if (oldFilter) {
+      LOG(WARNING) << "Merging with " << oldFilter->toString();
+    }
+  }
+
+  if (isNotExpr(expr, call, evaluator)) {
+    auto inner = extractFiltersFromRemainingFilter(
+        call->inputs()[0], evaluator, !negated, filters, sampleRate);
+    return inner ? replaceInputs(call, {inner}) : nullptr;
+  }
+
+  if ((call->name() == "and" && !negated) ||
+      (call->name() == "or" && negated)) {
+    auto lhs = extractFiltersFromRemainingFilter(
+        call->inputs()[0], evaluator, negated, filters, sampleRate);
+    auto rhs = extractFiltersFromRemainingFilter(
+        call->inputs()[1], evaluator, negated, filters, sampleRate);
+    if (!lhs) {
+      return rhs;
+    }
+    if (!rhs) {
+      return lhs;
+    }
+    return replaceInputs(call, {lhs, rhs});
+  }
+  if (!negated) {
+    double rate = getPrestoSampleRate(expr, call, evaluator);
+    if (rate != -1) {
+      sampleRate *= rate;
+      return nullptr;
+    }
+  }
+  return expr;
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/ConnectorUtil.h b/velox/connectors/lakehouse/iceberg/ConnectorUtil.h
new file mode 100644
index 000000000000..c830cce3fa4a
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/ConnectorUtil.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "ConnectorSplitBase.h"
+#include "FileHandle.h"
+#include "TableHandleBase.h"
+#include "velox/core/Expressions.h"
+#include "velox/dwio/common/BufferedInput.h"
+#include "velox/type/Type.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+struct SubfieldSpec {
+  const velox::common::Subfield* subfield;
+  bool filterOnly;
+};
+
+bool isSynthesizedColumn(
+    const std::string& name,
+    const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>&
+        infoColumns);
+
+bool isSpecialColumn(
+    const std::string& name,
+    const std::optional<std::string>& specialName);
+
+void processFieldSpec(
+    const RowTypePtr& dataColumns,
+    const TypePtr& outputType,
+    velox::common::ScanSpec& fieldSpec);
+
+const std::string& getColumnName(
+    const velox::common::Subfield& subfield);
+
+void checkColumnNameLowerCase(
+    const std::shared_ptr<const Type>& type);
+
+void checkColumnNameLowerCase(
+    const velox::common::SubfieldFilters& filters,
+    const std::unordered_map<
+        std::string,
+        std::shared_ptr<const ColumnHandleBase>>&
+        infoColumns);
+
+void checkColumnNameLowerCase(
+    const core::TypedExprPtr& typeExpr);
+
+void configureReaderOptions(
+    const std::shared_ptr<
+        const ConnectorConfigBase>&
+        ConnectorConfigBase,
+    const connector::ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<
+        const TableHandleBase>& tableHandle,
+    const std::shared_ptr<
+        const ConnectorSplitBase>& split,
+    dwio::common::ReaderOptions& readerOptions);
+
+void configureReaderOptions(
+    const std::shared_ptr<
+        const ConnectorConfigBase>&
+        ConnectorConfigBase,
+    const connector::ConnectorQueryCtx* connectorQueryCtx,
+    const RowTypePtr& fileSchema,
+    const std::shared_ptr<
+        const ConnectorSplitBase>& split,
+    const std::unordered_map<std::string, std::string>& tableParameters,
+    dwio::common::ReaderOptions& readerOptions);
+
+void configureRowReaderOptions(
+    const std::unordered_map<std::string, std::string>& tableParameters,
+    const std::shared_ptr<velox::common::ScanSpec>& scanSpec,
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    const RowTypePtr& rowType,
+    const std::shared_ptr<
+        const ConnectorSplitBase>&
+        icebergSplit,
+    const std::shared_ptr<
+        const ConnectorConfigBase>& icebergConfig,
+    const config::ConfigBase* sessionProperties,
+    dwio::common::RowReaderOptions& rowReaderOptions);
+
+void configureRowReaderOptions(
+    const std::unordered_map<std::string, std::string>& tableParameters,
+    const std::shared_ptr<velox::common::ScanSpec>& scanSpec,
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    const RowTypePtr& rowType,
+    const std::shared_ptr<
+        const ConnectorSplitBase>&
+        icebergSplit,
+    const std::shared_ptr<
+        const ConnectorConfigBase>& icebergConfig,
+    const config::ConfigBase* sessionProperties,
+    dwio::common::RowReaderOptions& rowReaderOptions);
+
+void addSubfields(
+    const Type& type,
+    std::vector<SubfieldSpec>& subfields,
+    int level,
+    memory::MemoryPool* pool,
+    velox::common::ScanSpec& spec);
+
+template <TypeKind kind>
+VectorPtr newConstantFromString(
+    const TypePtr& type,
+    const std::optional<std::string>& value,
+    vector_size_t size,
+    velox::memory::MemoryPool* pool,
+    const std::string& sessionTimezone,
+    bool asLocalTime,
+    bool isPartitionDateDaysSinceEpoch = false){
+  using T = typename TypeTraits<kind>::NativeType;
+  if (!value.has_value()) {
+    return std::make_shared<ConstantVector<T>>(pool, size, true, type, T());
+  }
+
+  if (type->isDate()) {
+    int32_t days = 0;
+    // For Iceberg, the date partition values are already in daysSinceEpoch
+    // form.
+    if (isPartitionDateDaysSinceEpoch) {
+      days = folly::to<int32_t>(value.value());
+    } else {
+      days = DATE()->toDays(static_cast<folly::StringPiece>(value.value()));
+    }
+    return std::make_shared<ConstantVector<int32_t>>(
+        pool, size, false, type, std::move(days));
+  }
+
+  if constexpr (std::is_same_v<T, StringView>) {
+    return std::make_shared<ConstantVector<StringView>>(
+        pool, size, false, type, StringView(value.value()));
+  } else {
+    auto copy = velox::util::Converter<kind>::tryCast(value.value())
+                    .thenOrThrow(folly::identity, [&](const Status& status) {
+                      VELOX_USER_FAIL("{}", status.message());
+                    });
+    if constexpr (kind == TypeKind::TIMESTAMP) {
+      if (asLocalTime) {
+        copy.toGMT(Timestamp::defaultTimezone());
+      }
+    }
+    return std::make_shared<ConstantVector<T>>(
+        pool, size, false, type, std::move(copy));
+  }
+}
+
+std::unique_ptr<dwio::common::BufferedInput>
+createBufferedInput(
+    const FileHandle& fileHandle,
+    const dwio::common::ReaderOptions& readerOpts,
+    const connector::ConnectorQueryCtx* connectorQueryCtx,
+    std::shared_ptr<io::IoStatistics> ioStats,
+    std::shared_ptr<filesystems::File::IoStats> fsStats,
+    folly::Executor* executor);
+
+core::TypedExprPtr extractFiltersFromRemainingFilter(
+    const core::TypedExprPtr& expr,
+    core::ExpressionEvaluator* evaluator,
+    bool negated,
+    velox::common::SubfieldFilters& filters,
+    double& sampleRate);
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/DataSourceBase.cpp b/velox/connectors/lakehouse/iceberg/DataSourceBase.cpp
new file mode 100644
index 000000000000..c8967238d6eb
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/DataSourceBase.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DataSourceBase.h"
+
+#include "ConnectorUtil.h"
+#include "velox/dwio/common/ReaderFactory.h"
+
+#include <string>
+#include <unordered_map>
+
+using facebook::velox::common::testutil::TestValue;
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+//
+DataSourceBase::DataSourceBase(
+    const RowTypePtr& outputType,
+    const ConnectorTableHandlePtr& tableHandle,
+    const connector::ColumnHandleMap& columnHandles,
+    FileHandleFactory* fileHandleFactory,
+    folly::Executor* executor,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<ConnectorConfigBase>& connectorConfig)
+    : connectorQueryCtx_(connectorQueryCtx),
+      fileHandleFactory_(fileHandleFactory),
+      executor_(executor),
+      expressionEvaluator_(connectorQueryCtx->expressionEvaluator()),
+      connectorConfig_(connectorConfig),
+      pool_(connectorQueryCtx->memoryPool()),
+      outputType_(outputType) {}
+
+void DataSourceBase::addDynamicFilter(
+    column_index_t outputChannel,
+    const std::shared_ptr<velox::common::Filter>& filter) {
+  auto& fieldSpec = scanSpec_->getChildByChannel(outputChannel);
+  fieldSpec.setFilter(filter);
+  scanSpec_->resetCachedValues(true);
+  if (splitReader_) {
+    splitReader_->resetFilterCaches();
+  }
+}
+
+std::unordered_map<std::string, RuntimeMetric>
+DataSourceBase::runtimeStats() {
+  auto res = runtimeStats_.toRuntimeMetricMap();
+  res.insert(
+      {{"numPrefetch", RuntimeMetric(ioStats_->prefetch().count())},
+       {"prefetchBytes",
+        RuntimeMetric(
+            ioStats_->prefetch().sum(), RuntimeCounter::Unit::kBytes)},
+       {"totalScanTime",
+        RuntimeMetric(ioStats_->totalScanTime(), RuntimeCounter::Unit::kNanos)},
+       {Connector::kTotalRemainingFilterTime,
+        RuntimeMetric(
+            totalRemainingFilterTime_.load(std::memory_order_relaxed),
+            RuntimeCounter::Unit::kNanos)},
+       {"ioWaitWallNanos",
+        RuntimeMetric(
+            ioStats_->queryThreadIoLatency().sum() * 1000,
+            RuntimeCounter::Unit::kNanos)},
+       {"maxSingleIoWaitWallNanos",
+        RuntimeMetric(
+            ioStats_->queryThreadIoLatency().max() * 1000,
+            RuntimeCounter::Unit::kNanos)},
+       {"overreadBytes",
+        RuntimeMetric(
+            ioStats_->rawOverreadBytes(), RuntimeCounter::Unit::kBytes)}});
+  if (ioStats_->read().count() > 0) {
+    res.insert({"numStorageRead", RuntimeMetric(ioStats_->read().count())});
+    res.insert(
+        {"storageReadBytes",
+         RuntimeMetric(ioStats_->read().sum(), RuntimeCounter::Unit::kBytes)});
+  }
+  if (ioStats_->ssdRead().count() > 0) {
+    res.insert({"numLocalRead", RuntimeMetric(ioStats_->ssdRead().count())});
+    res.insert(
+        {"localReadBytes",
+         RuntimeMetric(
+             ioStats_->ssdRead().sum(), RuntimeCounter::Unit::kBytes)});
+  }
+  if (ioStats_->ramHit().count() > 0) {
+    res.insert({"numRamRead", RuntimeMetric(ioStats_->ramHit().count())});
+    res.insert(
+        {"ramReadBytes",
+         RuntimeMetric(
+             ioStats_->ramHit().sum(), RuntimeCounter::Unit::kBytes)});
+  }
+
+  const auto fsStats = fsStats_->stats();
+  for (const auto& storageStats : fsStats) {
+    res.emplace(storageStats.first, storageStats.second);
+  }
+  return res;
+}
+
+void DataSourceBase::setFromDataSource(
+    std::unique_ptr<DataSource> sourceUnique) {
+  auto source = dynamic_cast<DataSourceBase*>(sourceUnique.get());
+  VELOX_CHECK_NOT_NULL(source, "Bad DataSource type");
+
+  split_ = std::move(source->split_);
+  runtimeStats_.skippedSplits += source->runtimeStats_.skippedSplits;
+  runtimeStats_.processedSplits += source->runtimeStats_.processedSplits;
+  runtimeStats_.skippedSplitBytes += source->runtimeStats_.skippedSplitBytes;
+  readerOutputType_ = std::move(source->readerOutputType_);
+  source->scanSpec_->moveAdaptationFrom(*scanSpec_);
+  scanSpec_ = std::move(source->scanSpec_);
+  splitReader_ = std::move(source->splitReader_);
+  splitReader_->setConnectorQueryCtx(connectorQueryCtx_);
+  // New io will be accounted on the stats of 'source'. Add the existing
+  // balance to that.
+  source->ioStats_->merge(*ioStats_);
+  ioStats_ = std::move(source->ioStats_);
+  source->fsStats_->merge(*fsStats_);
+  fsStats_ = std::move(source->fsStats_);
+}
+
+int64_t DataSourceBase::estimatedRowSize() {
+  if (!splitReader_) {
+    return kUnknownRowSize;
+  }
+  return splitReader_->estimatedRowSize();
+}
+
+vector_size_t DataSourceBase::evaluateRemainingFilter(
+    RowVectorPtr& rowVector) {
+  for (auto fieldIndex : multiReferencedFields_) {
+    LazyVector::ensureLoadedRows(
+        rowVector->childAt(fieldIndex),
+        filterRows_,
+        filterLazyDecoded_,
+        filterLazyBaseRows_);
+  }
+  uint64_t filterTimeUs{0};
+  vector_size_t rowsRemaining{0};
+  {
+    MicrosecondTimer timer(&filterTimeUs);
+    expressionEvaluator_->evaluate(
+        remainingFilterExprSet_.get(), filterRows_, *rowVector, filterResult_);
+    rowsRemaining = exec::processFilterResults(
+        filterResult_, filterRows_, filterEvalCtx_, pool_);
+  }
+  totalRemainingFilterTime_.fetch_add(
+      filterTimeUs * 1000, std::memory_order_relaxed);
+  return rowsRemaining;
+}
+
+bool isSpecialColumn(const std::string& name) {
+  return false;
+}
+
+void DataSourceBase::resetSplit() {
+  split_.reset();
+  splitReader_->resetSplit();
+  // Keep readers around to hold adaptation.
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/DataSourceBase.h b/velox/connectors/lakehouse/iceberg/DataSourceBase.h
new file mode 100644
index 000000000000..899caabe0c14
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/DataSourceBase.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "ConnectorSplitBase.h"
+#include "ConnectorUtil.h"
+#include "FileHandle.h"
+#include "SplitReaderBase.h"
+#include "velox/connectors/Connector.h"
+#include "velox/dwio/common/ScanSpec.h"
+#include "velox/exec/OperatorUtils.h"
+#include "velox/expression/FieldReference.h"
+#include "velox/type/Type.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class DataSourceBase : public DataSource {
+ public:
+  DataSourceBase(
+      const RowTypePtr& outputType,
+      const ConnectorTableHandlePtr& tableHandle,
+      const connector::ColumnHandleMap& columnHandles,
+      FileHandleFactory* fileHandleFactory,
+      folly::Executor* executor,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      const std::shared_ptr<ConnectorConfigBase>& connectorConfig);
+
+  const velox::common::SubfieldFilters* getFilters() const override {
+    return &filters_;
+  }
+
+  void addDynamicFilter(
+      column_index_t outputChannel,
+      const std::shared_ptr<velox::common::Filter>& filter) override;
+
+  uint64_t getCompletedBytes() override {
+    return ioStats_->rawBytesRead();
+  }
+
+  uint64_t getCompletedRows() override {
+    return completedRows_;
+  }
+
+  std::unordered_map<std::string, RuntimeMetric> runtimeStats();
+
+  bool allPrefetchIssued() const override {
+    return splitReader_ && splitReader_->allPrefetchIssued();
+  }
+
+  void setFromDataSource(std::unique_ptr<DataSource> sourceUnique) override;
+
+  int64_t estimatedRowSize() override;
+
+ protected:
+//  virtual std::unique_ptr<SplitReaderBase> createSplitReader();
+  //virtual std::shared_ptr<velox::common::ScanSpec> makeScanSpec() = 0;
+
+  virtual bool isSpecialColumn(const std::string& name) const {
+    VELOX_UNREACHABLE();
+  }
+
+  virtual bool hasRemainingPartitionFilter() {
+    return false;
+  }
+
+  virtual vector_size_t evaluateRemainingPartitionFilter(
+      RowVectorPtr& rowVector,
+      BufferPtr& remainingIndices) {
+    return rowVector->size();
+  }
+
+  // Evaluates remainingFilter_ on the specified vector. Returns number of rows
+  // passed. Populates filterEvalCtx_.selectedIndices and selectedBits if only
+  // some rows passed the filter. If none or all rows passed
+  // filterEvalCtx_.selectedIndices and selectedBits are not updated.
+  vector_size_t evaluateRemainingFilter(RowVectorPtr& rowVector);
+
+  const RowVectorPtr& getEmptyOutput() {
+    if (!emptyOutput_) {
+      emptyOutput_ = RowVector::createEmpty(outputType_, pool_);
+    }
+    return emptyOutput_;
+  }
+
+  void resetSplit();
+
+  const ConnectorQueryCtx* const connectorQueryCtx_;
+  FileHandleFactory* const fileHandleFactory_;
+  folly::Executor* const executor_;
+  core::ExpressionEvaluator* const expressionEvaluator_;
+  const std::shared_ptr<ConnectorConfigBase> connectorConfig_;
+  memory::MemoryPool* const pool_;
+
+  // The row type for the data source output, not including filter-only columns
+  const RowTypePtr outputType_;
+  // Output type from file reader.  This is different from outputType_ that it
+  // contains column names before assignment, and columns that only used in
+  // remaining filter.
+  RowTypePtr readerOutputType_;
+  folly::F14FastMap<std::string, std::vector<const velox::common::Subfield*>>
+      subfields_;
+
+  std::shared_ptr<const TableHandleBase> tableHandle_;
+  // Column handles for the partition key columns keyed on partition key column
+  // name. It comes from the TableScanNode's assignments.
+  std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>
+      partitionColumnHandles_;
+  // Column handles for the Split info columns keyed on their column names.
+  std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>
+      infoColumns_;
+  //  std::unordered_map<std::string, std::shared_ptr<ColumnHandleBase>>
+  //      specialColumns_;
+  //  SpecialColumnNames specialColumns_{};
+  std::shared_ptr<velox::common::ScanSpec> scanSpec_;
+  std::shared_ptr<ConnectorSplitBase> split_;
+  std::unique_ptr<SplitReaderBase> splitReader_;
+
+  VectorPtr output_;
+
+  std::shared_ptr<io::IoStatistics> ioStats_;
+  std::shared_ptr<filesystems::File::IoStats> fsStats_;
+  dwio::common::RuntimeStatistics runtimeStats_;
+  std::atomic<uint64_t> totalRemainingFilterTime_{0};
+  uint64_t completedRows_ = 0;
+
+  velox::common::SubfieldFilters filters_;
+  std::shared_ptr<velox::common::MetadataFilter> metadataFilter_;
+  std::shared_ptr<exec::ExprSet> remainingFilterExprSet_;
+  RowVectorPtr emptyOutput_;
+
+  // Field indices referenced in both remaining filter and output type. These
+  // columns need to be materialized eagerly to avoid missing values in output.
+  std::vector<column_index_t> multiReferencedFields_;
+
+  std::shared_ptr<random::RandomSkipTracker> randomSkip_;
+  std::vector<uint32_t> partitions_;
+
+  // Reusable memory for remaining filter evaluation.
+  VectorPtr filterResult_;
+  SelectivityVector filterRows_;
+  DecodedVector filterLazyDecoded_;
+  SelectivityVector filterLazyBaseRows_;
+  exec::FilterEvalCtx filterEvalCtx_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/FileHandle.cpp b/velox/connectors/lakehouse/iceberg/FileHandle.cpp
new file mode 100644
index 000000000000..fb2e444ee6b3
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/FileHandle.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FileHandle.h"
+#include "velox/common/base/Counters.h"
+#include "velox/common/base/StatsReporter.h"
+
+#include <atomic>
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+uint64_t FileHandleSizer::operator()(const FileHandle& fileHandle) {
+  // TODO: add to support variable file cache size support when the file system
+  // underneath supports.
+  return 1;
+}
+
+namespace {
+// The group tracking is at the level of the directory, i.e. Hive partition.
+std::string groupName(const std::string& filename) {
+  const char* slash = strrchr(filename.c_str(), '/');
+  return slash ? std::string(filename.data(), slash - filename.data())
+               : filename;
+}
+} // namespace
+
+std::unique_ptr<FileHandle> FileHandleGenerator::operator()(
+    const FileHandleKey& key,
+    const FileProperties* properties,
+    filesystems::File::IoStats* stats) {
+  // We have seen cases where drivers are stuck when creating file handles.
+  // Adding a trace here to spot this more easily in future.
+  process::TraceContext trace("FileHandleGenerator::operator()");
+  uint64_t elapsedTimeUs{0};
+  std::unique_ptr<FileHandle> fileHandle;
+  {
+    MicrosecondTimer timer(&elapsedTimeUs);
+    fileHandle = std::make_unique<FileHandle>();
+    filesystems::FileOptions options;
+    options.stats = stats;
+    options.tokenProvider = key.tokenProvider;
+    if (properties) {
+      options.fileSize = properties->fileSize;
+      options.readRangeHint = properties->readRangeHint;
+      options.extraFileInfo = properties->extraFileInfo;
+    }
+    const auto& filename = key.filename;
+    fileHandle->file = filesystems::getFileSystem(filename, properties_)
+                           ->openFileForRead(filename, options);
+    fileHandle->uuid = StringIdLease(fileIds(), filename);
+    fileHandle->groupId = StringIdLease(fileIds(), groupName(filename));
+    VLOG(1) << "Generating file handle for: " << filename
+            << " uuid: " << fileHandle->uuid.id();
+  }
+  RECORD_HISTOGRAM_METRIC_VALUE(
+      kMetricHiveFileHandleGenerateLatencyMs, elapsedTimeUs / 1000);
+  // TODO: build the hash map/etc per file type -- presumably after reading
+  // the appropriate magic number from the file, or perhaps we include the file
+  // type in the file handle key.
+  return fileHandle;
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/FileHandle.h b/velox/connectors/lakehouse/iceberg/FileHandle.h
new file mode 100644
index 000000000000..51f4f3ab292c
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/FileHandle.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// A FileHandle is a File pointer plus some (optional, file-type-dependent)
+// extra information for speeding up loading columnar data. For example, when
+// we open a file we might build a hash map saying what region(s) on disk
+// correspond to a given column in a given stripe.
+//
+// The FileHandle will normally be used in conjunction with a CachedFactory
+// to speed up queries that hit the same files repeatedly; see the
+// FileHandleCache and FileHandleFactory.
+
+#pragma once
+
+#include "FileProperties.h"
+#include "velox/common/caching/CachedFactory.h"
+#include "velox/common/caching/FileIds.h"
+#include "velox/common/caching/StringIdMap.h"
+#include "velox/common/config/Config.h"
+#include "velox/common/file/File.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+// See the file comment.
+struct FileHandle {
+  std::shared_ptr<ReadFile> file;
+
+  // Each time we make a new FileHandle we assign it a uuid and use that id as
+  // the identifier in downstream data caching structures. This saves a lot of
+  // memory compared to using the filename as the identifier.
+  StringIdLease uuid;
+
+  // Id for the group of files this belongs to, e.g. its
+  // directory. Used for coarse granularity access tracking, for
+  // example to decide placing on SSD.
+  StringIdLease groupId;
+
+  // We'll want to have a hash map here to record the identifier->byte range
+  // mappings. Different formats may have different identifiers, so we may need
+  // a union of maps. For example in orc you need 3 integers (I think, to be
+  // confirmed with xldb): the row bundle, the node, and the sequence. For the
+  // first diff we'll not include the map.
+};
+
+/// Estimates the memory usage of a FileHandle object.
+struct FileHandleSizer {
+  uint64_t operator()(const FileHandle& a);
+};
+
+struct FileHandleKey {
+  std::string filename;
+  std::shared_ptr<filesystems::TokenProvider> tokenProvider{nullptr};
+
+  bool operator==(const FileHandleKey& other) const {
+    if (filename != other.filename) {
+      return false;
+    }
+
+    if (tokenProvider == other.tokenProvider) {
+      return true;
+    }
+
+    if (!tokenProvider || !other.tokenProvider) {
+      return false;
+    }
+
+    return tokenProvider->equals(*other.tokenProvider);
+  }
+};
+
+} // facebook::velox::connector::lakehouse::iceberg
+
+namespace std {
+template <>
+struct hash<facebook::velox::connector::lakehouse::iceberg::FileHandleKey> {
+  size_t operator()(
+      const facebook::velox::connector::lakehouse::iceberg::FileHandleKey& key)
+      const noexcept {
+    size_t filenameHash = std::hash<std::string>()(key.filename);
+    return key.tokenProvider ? facebook::velox::bits::hashMix(
+                                   filenameHash, key.tokenProvider->hash())
+                             : filenameHash;
+  }
+};
+} // namespace std
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+using FileHandleCache = SimpleLRUCache<FileHandleKey, FileHandle>;
+
+// Creates FileHandles via the Generator interface the CachedFactory requires.
+class FileHandleGenerator {
+ public:
+  FileHandleGenerator() {}
+  FileHandleGenerator(std::shared_ptr<const config::ConfigBase> properties)
+      : properties_(std::move(properties)) {}
+  std::unique_ptr<FileHandle> operator()(
+      const FileHandleKey& filename,
+      const FileProperties* properties,
+      filesystems::File::IoStats* stats);
+
+ private:
+  const std::shared_ptr<const config::ConfigBase> properties_;
+};
+
+using FileHandleFactory = CachedFactory<
+    FileHandleKey,
+    FileHandle,
+    FileHandleGenerator,
+    FileProperties,
+    filesystems::File::IoStats,
+    FileHandleSizer>;
+
+using FileHandleCachedPtr = CachedPtr<FileHandleKey, FileHandle>;
+
+using FileHandleCacheStats = SimpleLRUCacheStats;
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/FileProperties.h b/velox/connectors/lakehouse/iceberg/FileProperties.h
new file mode 100644
index 000000000000..0092c067105e
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/FileProperties.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <optional>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+struct FileProperties {
+  std::optional<int64_t> fileSize;
+  // TODO: remove. Not needed for Iceberg
+  std::optional<int64_t> modificationTime;
+  std::optional<int64_t> readRangeHint{std::nullopt};
+  // TODO: remove. Not needed for Iceberg
+  std::shared_ptr<std::string> extraFileInfo{nullptr};
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConfig.cpp b/velox/connectors/lakehouse/iceberg/IcebergConfig.cpp
new file mode 100644
index 000000000000..b4af8d8a18c3
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConfig.cpp
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/IcebergConfig.h"
+
+#include <boost/algorithm/string.hpp>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+namespace {
+
+IcebergConfig::InsertExistingPartitionsBehavior
+stringToInsertExistingPartitionsBehavior(const std::string& strValue) {
+  auto upperValue = boost::algorithm::to_upper_copy(strValue);
+  if (upperValue == "ERROR") {
+    return IcebergConfig::InsertExistingPartitionsBehavior::kError;
+  }
+  if (upperValue == "OVERWRITE") {
+    return IcebergConfig::InsertExistingPartitionsBehavior::kOverwrite;
+  }
+  VELOX_UNSUPPORTED(
+      "Unsupported insert existing partitions behavior: {}.", strValue);
+}
+
+} // namespace
+
+// static
+std::string IcebergConfig::insertExistingPartitionsBehaviorString(
+    InsertExistingPartitionsBehavior behavior) {
+  switch (behavior) {
+    case InsertExistingPartitionsBehavior::kError:
+      return "ERROR";
+    case InsertExistingPartitionsBehavior::kOverwrite:
+      return "OVERWRITE";
+    default:
+      return fmt::format("UNKNOWN BEHAVIOR {}", static_cast<int>(behavior));
+  }
+}
+
+IcebergConfig::InsertExistingPartitionsBehavior
+IcebergConfig::insertExistingPartitionsBehavior(
+    const config::ConfigBase* session) const {
+  return stringToInsertExistingPartitionsBehavior(session->get<std::string>(
+      kInsertExistingPartitionsBehaviorSession,
+      config_->get<std::string>(kInsertExistingPartitionsBehavior, "ERROR")));
+}
+
+uint32_t IcebergConfig::maxPartitionsPerWriters(
+    const config::ConfigBase* session) const {
+  return session->get<uint32_t>(
+      kMaxPartitionsPerWritersSession,
+      config_->get<uint32_t>(kMaxPartitionsPerWriters, 128));
+}
+
+uint32_t IcebergConfig::maxBucketCount(const config::ConfigBase* session) const {
+  return session->get<uint32_t>(
+      kMaxBucketCountSession, config_->get<uint32_t>(kMaxBucketCount, 100'000));
+}
+
+bool IcebergConfig::immutablePartitions() const {
+  return config_->get<bool>(kImmutablePartitions, false);
+}
+
+std::string IcebergConfig::gcsEndpoint() const {
+  return config_->get<std::string>(kGcsEndpoint, std::string(""));
+}
+
+std::string IcebergConfig::gcsCredentialsPath() const {
+  return config_->get<std::string>(kGcsCredentialsPath, std::string(""));
+}
+
+std::optional<int> IcebergConfig::gcsMaxRetryCount() const {
+  return static_cast<std::optional<int>>(config_->get<int>(kGcsMaxRetryCount));
+}
+
+std::optional<std::string> IcebergConfig::gcsMaxRetryTime() const {
+  return static_cast<std::optional<std::string>>(
+      config_->get<std::string>(kGcsMaxRetryTime));
+}
+
+bool IcebergConfig::isOrcUseColumnNames(const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kOrcUseColumnNamesSession, config_->get<bool>(kOrcUseColumnNames, false));
+}
+
+bool IcebergConfig::isParquetUseColumnNames(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kParquetUseColumnNamesSession,
+      config_->get<bool>(kParquetUseColumnNames, false));
+}
+
+bool IcebergConfig::isFileColumnNamesReadAsLowerCase(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kFileColumnNamesReadAsLowerCaseSession,
+      config_->get<bool>(kFileColumnNamesReadAsLowerCase, false));
+}
+
+bool IcebergConfig::isPartitionPathAsLowerCase(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(kPartitionPathAsLowerCaseSession, true);
+}
+
+bool IcebergConfig::allowNullPartitionKeys(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kAllowNullPartitionKeysSession,
+      config_->get<bool>(kAllowNullPartitionKeys, true));
+}
+
+bool IcebergConfig::ignoreMissingFiles(const config::ConfigBase* session) const {
+  return session->get<bool>(kIgnoreMissingFilesSession, false);
+}
+
+int64_t IcebergConfig::maxCoalescedBytes(const config::ConfigBase* session) const {
+  return session->get<int64_t>(
+      kMaxCoalescedBytesSession,
+      config_->get<int64_t>(kMaxCoalescedBytes, 128 << 20)); // 128MB
+}
+
+int32_t IcebergConfig::maxCoalescedDistanceBytes(
+    const config::ConfigBase* session) const {
+  const auto distance = config::toCapacity(
+      session->get<std::string>(
+          kMaxCoalescedDistanceSession,
+          config_->get<std::string>(kMaxCoalescedDistance, "512kB")),
+      config::CapacityUnit::BYTE);
+  VELOX_USER_CHECK_LE(
+      distance,
+      std::numeric_limits<int32_t>::max(),
+      "The max merge distance to combine read requests must be less than 2GB."
+      " Got {} bytes.",
+      distance);
+  return int32_t(distance);
+}
+
+int32_t IcebergConfig::prefetchRowGroups() const {
+  return config_->get<int32_t>(kPrefetchRowGroups, 1);
+}
+
+int32_t IcebergConfig::loadQuantum(const config::ConfigBase* session) const {
+  return session->get<int32_t>(
+      kLoadQuantumSession, config_->get<int32_t>(kLoadQuantum, 8 << 20));
+}
+
+int32_t IcebergConfig::numCacheFileHandles() const {
+  return config_->get<int32_t>(kNumCacheFileHandles, 20'000);
+}
+
+uint64_t IcebergConfig::fileHandleExpirationDurationMs() const {
+  return config_->get<uint64_t>(kFileHandleExpirationDurationMs, 0);
+}
+
+bool IcebergConfig::isFileHandleCacheEnabled() const {
+  return config_->get<bool>(kEnableFileHandleCache, true);
+}
+
+std::string IcebergConfig::writeFileCreateConfig() const {
+  return config_->get<std::string>(kWriteFileCreateConfig, "");
+}
+
+uint32_t IcebergConfig::sortWriterMaxOutputRows(
+    const config::ConfigBase* session) const {
+  return session->get<uint32_t>(
+      kSortWriterMaxOutputRowsSession,
+      config_->get<uint32_t>(kSortWriterMaxOutputRows, 1024));
+}
+
+uint64_t IcebergConfig::sortWriterMaxOutputBytes(
+    const config::ConfigBase* session) const {
+  return config::toCapacity(
+      session->get<std::string>(
+          kSortWriterMaxOutputBytesSession,
+          config_->get<std::string>(kSortWriterMaxOutputBytes, "10MB")),
+      config::CapacityUnit::BYTE);
+}
+
+uint64_t IcebergConfig::sortWriterFinishTimeSliceLimitMs(
+    const config::ConfigBase* session) const {
+  return session->get<uint64_t>(
+      kSortWriterFinishTimeSliceLimitMsSession,
+      config_->get<uint64_t>(kSortWriterFinishTimeSliceLimitMs, 5'000));
+}
+
+uint64_t IcebergConfig::footerEstimatedSize() const {
+  return config_->get<uint64_t>(kFooterEstimatedSize, 256UL << 10);
+}
+
+uint64_t IcebergConfig::filePreloadThreshold() const {
+  return config_->get<uint64_t>(kFilePreloadThreshold, 8UL << 20);
+}
+
+uint8_t IcebergConfig::readTimestampUnit(const config::ConfigBase* session) const {
+  const auto unit = session->get<uint8_t>(
+      kReadTimestampUnitSession,
+      config_->get<uint8_t>(kReadTimestampUnit, 3 /*milli*/));
+  VELOX_CHECK(
+      unit == 3 || unit == 6 /*micro*/ || unit == 9 /*nano*/,
+      "Invalid timestamp unit.");
+  return unit;
+}
+
+bool IcebergConfig::readTimestampPartitionValueAsLocalTime(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kReadTimestampPartitionValueAsLocalTimeSession,
+      config_->get<bool>(kReadTimestampPartitionValueAsLocalTime, true));
+}
+
+bool IcebergConfig::readStatsBasedFilterReorderDisabled(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kReadStatsBasedFilterReorderDisabledSession,
+      config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
+}
+
+bool IcebergConfig::isRequestedTypeCheckEnabled(
+    const config::ConfigBase* session) const {
+  return session->get<bool>(
+      kEnableRequestedTypeCheckSession,
+      config_->get<bool>(kEnableRequestedTypeCheck, true));
+}
+
+std::string IcebergConfig::icebergLocalDataPath() const {
+  return config_->get<std::string>(kLocalDataPath, "");
+}
+
+std::string IcebergConfig::icebergLocalFileFormat() const {
+  return config_->get<std::string>(kLocalFileFormat, "");
+}
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConfig.h b/velox/connectors/lakehouse/iceberg/IcebergConfig.h
new file mode 100644
index 000000000000..76b92a467ecc
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConfig.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "velox/common/base/Exceptions.h"
+
+#include <optional>
+#include <string>
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class IcebergConfig : public lakehouse::iceberg::ConnectorConfigBase {
+ public:
+  IcebergConfig(std::shared_ptr<const config::ConfigBase> config)
+      : lakehouse::iceberg::ConnectorConfigBase(config) {}
+
+  enum class InsertExistingPartitionsBehavior {
+    kError,
+    kOverwrite,
+  };
+
+  static std::string insertExistingPartitionsBehaviorString(
+      InsertExistingPartitionsBehavior behavior);
+
+  /// Behavior on insert into existing partitions.
+  static constexpr const char* kInsertExistingPartitionsBehaviorSession =
+      "insert_existing_partitions_behavior";
+  static constexpr const char* kInsertExistingPartitionsBehavior =
+      "insert-existing-partitions-behavior";
+
+  /// Maximum number of (bucketed) partitions per a single table writer
+  /// instance.
+  static constexpr const char* kMaxPartitionsPerWriters =
+      "max-partitions-per-writers";
+  static constexpr const char* kMaxPartitionsPerWritersSession =
+      "max_partitions_per_writers";
+
+  /// Maximum number of buckets allowed to output by the table writers.
+  static constexpr const char* kMaxBucketCount = "hive.max-bucket-count";
+  static constexpr const char* kMaxBucketCountSession = "hive.max_bucket_count";
+
+  /// Whether new data can be inserted into an unpartition table.
+  /// Velox currently does not support appending data to existing partitions.
+  static constexpr const char* kImmutablePartitions =
+      "hive.immutable-partitions";
+
+  /// The GCS storage endpoint server.
+  static constexpr const char* kGcsEndpoint = "hive.gcs.endpoint";
+
+  /// The GCS service account configuration JSON key file.
+  static constexpr const char* kGcsCredentialsPath =
+      "hive.gcs.json-key-file-path";
+
+  /// The GCS maximum retry counter of transient errors.
+  static constexpr const char* kGcsMaxRetryCount = "hive.gcs.max-retry-count";
+
+  /// The GCS maximum time allowed to retry transient errors.
+  static constexpr const char* kGcsMaxRetryTime = "hive.gcs.max-retry-time";
+
+  /// Maps table field names to file field names using names, not indices.
+  // TODO: remove hive_orc_use_column_names since it doesn't exist in presto,
+  // right now this is only used for testing.
+  static constexpr const char* kOrcUseColumnNames = "hive.orc.use-column-names";
+  static constexpr const char* kOrcUseColumnNamesSession =
+      "hive_orc_use_column_names";
+
+  /// Maps table field names to file field names using names, not indices.
+  static constexpr const char* kParquetUseColumnNames =
+      "hive.parquet.use-column-names";
+  static constexpr const char* kParquetUseColumnNamesSession =
+      "parquet_use_column_names";
+
+  /// Reads the source file column name as lower case.
+  static constexpr const char* kFileColumnNamesReadAsLowerCase =
+      "file-column-names-read-as-lower-case";
+  static constexpr const char* kFileColumnNamesReadAsLowerCaseSession =
+      "file_column_names_read_as_lower_case";
+
+  static constexpr const char* kPartitionPathAsLowerCaseSession =
+      "partition_path_as_lower_case";
+
+  static constexpr const char* kAllowNullPartitionKeys =
+      "allow-null-partition-keys";
+  static constexpr const char* kAllowNullPartitionKeysSession =
+      "allow_null_partition_keys";
+
+  static constexpr const char* kIgnoreMissingFilesSession =
+      "ignore_missing_files";
+
+  /// The max coalesce bytes for a request.
+  static constexpr const char* kMaxCoalescedBytes = "max-coalesced-bytes";
+  static constexpr const char* kMaxCoalescedBytesSession =
+      "max-coalesced-bytes";
+
+  /// The max merge distance to combine read requests.
+  /// Note: The session property name differs from the constant name for
+  /// backward compatibility with Presto.
+  static constexpr const char* kMaxCoalescedDistance = "max-coalesced-distance";
+  static constexpr const char* kMaxCoalescedDistanceSession =
+      "orc_max_merge_distance";
+
+  /// The number of prefetch rowgroups
+  static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups";
+
+  /// The total size in bytes for a direct coalesce request. Up to 8MB load
+  /// quantum size is supported when SSD cache is enabled.
+  static constexpr const char* kLoadQuantum = "load-quantum";
+  static constexpr const char* kLoadQuantumSession = "load-quantum";
+
+  /// Maximum number of entries in the file handle cache.
+  static constexpr const char* kNumCacheFileHandles = "num_cached_file_handles";
+
+  /// Expiration time in ms for a file handle in the cache. A value of 0
+  /// means cache will not evict the handle after kFileHandleExprationDurationMs
+  /// has passed.
+  static constexpr const char* kFileHandleExpirationDurationMs =
+      "file-handle-expiration-duration-ms";
+
+  /// Enable file handle cache.
+  static constexpr const char* kEnableFileHandleCache =
+      "file-handle-cache-enabled";
+
+  /// The size in bytes to be fetched with Meta data together, used when the
+  /// data after meta data will be used later. Optimization to decrease small IO
+  /// request
+  static constexpr const char* kFooterEstimatedSize = "footer-estimated-size";
+
+  /// The threshold of file size in bytes when the whole file is fetched with
+  /// meta data together. Optimization to decrease the small IO requests
+  static constexpr const char* kFilePreloadThreshold = "file-preload-threshold";
+
+  /// Config used to create write files. This config is provided to underlying
+  /// file system through hive connector and data sink. The config is free form.
+  /// The form should be defined by the underlying file system.
+  static constexpr const char* kWriteFileCreateConfig =
+      "hive.write_file_create_config";
+
+  /// Maximum number of rows for sort writer in one batch of output.
+  static constexpr const char* kSortWriterMaxOutputRows =
+      "sort-writer-max-output-rows";
+  static constexpr const char* kSortWriterMaxOutputRowsSession =
+      "sort_writer_max_output_rows";
+
+  /// Maximum bytes for sort writer in one batch of output.
+  static constexpr const char* kSortWriterMaxOutputBytes =
+      "sort-writer-max-output-bytes";
+  static constexpr const char* kSortWriterMaxOutputBytesSession =
+      "sort_writer_max_output_bytes";
+
+  /// Sort Writer will exit finish() method after this many milliseconds even if
+  /// it has not completed its work yet. Zero means no time limit.
+  static constexpr const char* kSortWriterFinishTimeSliceLimitMs =
+      "sort-writer_finish_time_slice_limit_ms";
+  static constexpr const char* kSortWriterFinishTimeSliceLimitMsSession =
+      "sort_writer_finish_time_slice_limit_ms";
+
+  // The unit for reading timestamps from files.
+  static constexpr const char* kReadTimestampUnit =
+      "hive.reader.timestamp-unit";
+  static constexpr const char* kReadTimestampUnitSession =
+      "hive.reader.timestamp_unit";
+
+  static constexpr const char* kReadTimestampPartitionValueAsLocalTime =
+      "hive.reader.timestamp-partition-value-as-local-time";
+  static constexpr const char* kReadTimestampPartitionValueAsLocalTimeSession =
+      "hive.reader.timestamp_partition_value_as_local_time";
+
+  static constexpr const char* kReadStatsBasedFilterReorderDisabled =
+      "stats-based-filter-reorder-disabled";
+  static constexpr const char* kReadStatsBasedFilterReorderDisabledSession =
+      "stats_based_filter_reorder_disabled";
+
+  static constexpr const char* kLocalDataPath = "hive_local_data_path";
+  static constexpr const char* kLocalFileFormat = "hive_local_file_format";
+
+  static constexpr const char* kEnableRequestedTypeCheck =
+      "enable-requested-type-check";
+  static constexpr const char* kEnableRequestedTypeCheckSession =
+      "enable_requested_type_check";
+
+  InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
+      const config::ConfigBase* session) const;
+
+  uint32_t maxPartitionsPerWriters(const config::ConfigBase* session) const;
+
+  uint32_t maxBucketCount(const config::ConfigBase* session) const;
+
+  bool immutablePartitions() const;
+
+  std::string gcsEndpoint() const;
+
+  std::string gcsCredentialsPath() const;
+
+  std::optional<int> gcsMaxRetryCount() const;
+
+  std::optional<std::string> gcsMaxRetryTime() const;
+
+  bool isOrcUseColumnNames(const config::ConfigBase* session) const;
+
+  bool isParquetUseColumnNames(const config::ConfigBase* session) const;
+
+  bool isFileColumnNamesReadAsLowerCase(
+      const config::ConfigBase* session) const;
+
+  bool isPartitionPathAsLowerCase(const config::ConfigBase* session) const;
+
+  bool allowNullPartitionKeys(const config::ConfigBase* session) const;
+
+  bool ignoreMissingFiles(const config::ConfigBase* session) const;
+
+  int64_t maxCoalescedBytes(const config::ConfigBase* session) const;
+
+  int32_t maxCoalescedDistanceBytes(const config::ConfigBase* session) const;
+
+  int32_t prefetchRowGroups() const;
+
+  int32_t loadQuantum(const config::ConfigBase* session) const;
+
+  int32_t numCacheFileHandles() const;
+
+  uint64_t fileHandleExpirationDurationMs() const;
+
+  bool isFileHandleCacheEnabled() const;
+
+  uint64_t fileWriterFlushThresholdBytes() const;
+
+  std::string writeFileCreateConfig() const;
+
+  uint32_t sortWriterMaxOutputRows(const config::ConfigBase* session) const;
+
+  uint64_t sortWriterMaxOutputBytes(const config::ConfigBase* session) const;
+
+  uint64_t sortWriterFinishTimeSliceLimitMs(
+      const config::ConfigBase* session) const;
+
+  uint64_t footerEstimatedSize() const;
+
+  uint64_t filePreloadThreshold() const;
+
+  // Returns the timestamp unit used when reading timestamps from files.
+  uint8_t readTimestampUnit(const config::ConfigBase* session) const;
+
+  // Whether to read timestamp partition value as local time. If false, read as
+  // UTC.
+  bool readTimestampPartitionValueAsLocalTime(
+      const config::ConfigBase* session) const;
+
+  /// Returns true if the stats based filter reorder for read is disabled.
+  bool readStatsBasedFilterReorderDisabled(
+      const config::ConfigBase* session) const;
+
+  /// Whether to enable requested type check in the ReaderBase::convertType.
+  /// Returns true by default.
+  bool isRequestedTypeCheckEnabled(const config::ConfigBase* session) const;
+
+  /// Returns the file system path containing local data. If non-empty,
+  /// initializes LocalHiveConnectorMetadata to provide metadata for the tables
+  /// in the directory.
+  std::string icebergLocalDataPath() const;
+
+  /// Returns the name of the file format to use in interpreting the contents of
+  /// hiveLocalDataPath().
+  std::string icebergLocalFileFormat() const;
+
+  const std::shared_ptr<const config::ConfigBase>& config() const {
+    return config_;
+  }
+};
+
+
+
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnector.cpp b/velox/connectors/lakehouse/iceberg/IcebergConnector.cpp
new file mode 100644
index 000000000000..7702365c58e6
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnector.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergConnector.h"
+
+#include "IcebergConfig.h"
+#include "IcebergDataSource.h"
+#include "IcebergPartitionFunction.h"
+#include "velox/common/base/Fs.h"
+
+#include <boost/lexical_cast.hpp>
+#include <folly/Executor.h>
+
+#include <memory>
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+using namespace facebook::velox::connector;
+using namespace facebook::velox::exec;
+
+namespace {
+std::vector<std::unique_ptr<IcebergConnectorMetadataFactory>>&
+IcebergConnectorMetadataFactories() {
+  static std::vector<std::unique_ptr<IcebergConnectorMetadataFactory>>
+      factories;
+  return factories;
+}
+} // namespace
+
+IcebergConnector::IcebergConnector(
+    const std::string& id,
+    std::shared_ptr<const config::ConfigBase> config,
+    folly::Executor* executor)
+    : Connector(id),
+      icebergConfig_(std::make_shared<IcebergConfig>(config)),
+      fileHandleFactory_(
+          icebergConfig_->isFileHandleCacheEnabled()
+              ? std::make_unique<SimpleLRUCache<FileHandleKey, FileHandle>>(
+                    icebergConfig_->numCacheFileHandles())
+              : nullptr,
+          std::make_unique<FileHandleGenerator>(config)),
+      executor_(executor) {
+  if (icebergConfig_->isFileHandleCacheEnabled()) {
+    LOG(INFO) << "Iceberg connector " << connectorId()
+              << " created with maximum of "
+              << icebergConfig_->numCacheFileHandles()
+              << " cached file handles with expiration of "
+              << icebergConfig_->fileHandleExpirationDurationMs() << "ms.";
+  } else {
+    LOG(INFO) << "Iceberg connector " << connectorId()
+              << " created with file handle cache disabled";
+  }
+}
+
+std::unique_ptr<DataSource> IcebergConnector::createDataSource(
+    const RowTypePtr& outputType,
+    const ConnectorTableHandlePtr& tableHandle,
+    const connector::ColumnHandleMap& columnHandles,
+    ConnectorQueryCtx* connectorQueryCtx) {
+  return std::make_unique<IcebergDataSource>(
+      outputType,
+      tableHandle,
+      columnHandles,
+      &fileHandleFactory_,
+      executor_,
+      connectorQueryCtx,
+      icebergConfig_);
+}
+
+std::unique_ptr<DataSink> IcebergConnector::createDataSink(
+    RowTypePtr inputType,
+    ConnectorInsertTableHandlePtr connectorInsertTableHandle,
+    ConnectorQueryCtx* connectorQueryCtx,
+    CommitStrategy commitStrategy) {
+//  return std::make_unique<IcebergDataSink>(
+//      inputType,
+//      connectorInsertTableHandle,
+//      connectorQueryCtx,
+//      commitStrategy,
+//      icebergConfig_);
+  VELOX_NYI("IcbergDataSink not implemented yet");
+}
+
+// static
+// TODO: change to Iceberg semantics
+std::unique_ptr<core::PartitionFunction> IcebergPartitionFunctionSpec::create(
+    int numPartitions,
+    bool localExchange) const {
+  std::vector<int> bucketToPartitions;
+  if (bucketToPartition_.empty()) {
+    // NOTE: if hive partition function spec doesn't specify bucket to partition
+    // mapping, then we do round-robin mapping based on the actual number of
+    // partitions.
+    bucketToPartitions.resize(numBuckets_);
+    for (int bucket = 0; bucket < numBuckets_; ++bucket) {
+      bucketToPartitions[bucket] = bucket % numPartitions;
+    }
+    if (localExchange) {
+      // Shuffle the map from bucket to partition for local exchange so we don't
+      // use the same map for remote shuffle.
+      std::shuffle(
+          bucketToPartitions.begin(),
+          bucketToPartitions.end(),
+          std::mt19937{0});
+    }
+  }
+  return std::make_unique<IcebergPartitionFunction>(
+      numBuckets_,
+      bucketToPartition_.empty() ? std::move(bucketToPartitions)
+                                 : bucketToPartition_,
+      channels_,
+      constValues_);
+}
+
+// TODO: change to Iceberg semantics
+std::string IcebergPartitionFunctionSpec::toString() const {
+  std::ostringstream keys;
+  size_t constIndex = 0;
+  for (auto i = 0; i < channels_.size(); ++i) {
+    if (i > 0) {
+      keys << ", ";
+    }
+    auto channel = channels_[i];
+    if (channel == kConstantChannel) {
+      keys << "\"" << constValues_[constIndex++]->toString(0) << "\"";
+    } else {
+      keys << channel;
+    }
+  }
+
+  return fmt::format("Iceberg (({}) buckets: {})", keys.str(), numBuckets_);
+}
+
+// TODO: change to Iceberg semantics
+folly::dynamic IcebergPartitionFunctionSpec::serialize() const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = "IcebergPartitionFunctionSpec";
+  obj["numBuckets"] = ISerializable::serialize(numBuckets_);
+  obj["bucketToPartition"] = ISerializable::serialize(bucketToPartition_);
+  obj["keys"] = ISerializable::serialize(channels_);
+  std::vector<velox::core::ConstantTypedExpr> constValueExprs;
+  constValueExprs.reserve(constValues_.size());
+  for (const auto& value : constValues_) {
+    constValueExprs.emplace_back(value);
+  }
+  obj["constants"] = ISerializable::serialize(constValueExprs);
+  return obj;
+}
+
+// static
+// TODO: change to Iceberg semantics
+core::PartitionFunctionSpecPtr IcebergPartitionFunctionSpec::deserialize(
+    const folly::dynamic& obj,
+    void* context) {
+  std::vector<column_index_t> channels =
+      ISerializable::deserialize<std::vector<column_index_t>>(
+          obj["keys"], context);
+  const auto constTypedValues =
+      ISerializable::deserialize<std::vector<velox::core::ConstantTypedExpr>>(
+          obj["constants"], context);
+  std::vector<VectorPtr> constValues;
+  constValues.reserve(constTypedValues.size());
+  auto* pool = static_cast<memory::MemoryPool*>(context);
+  for (const auto& value : constTypedValues) {
+    constValues.emplace_back(value->toConstantVector(pool));
+  }
+  return std::make_shared<IcebergPartitionFunctionSpec>(
+      ISerializable::deserialize<int>(obj["numBuckets"], context),
+      ISerializable::deserialize<std::vector<int>>(
+          obj["bucketToPartition"], context),
+      std::move(channels),
+      std::move(constValues));
+}
+
+void registerIcebergPartitionFunctionSerDe() {
+  auto& registry = DeserializationWithContextRegistryForSharedPtr();
+  registry.Register(
+      "IcebergPartitionFunctionSpec",
+      IcebergPartitionFunctionSpec::deserialize);
+}
+
+bool registerIcebergConnectorMetadataFactory(
+    std::unique_ptr<IcebergConnectorMetadataFactory> factory) {
+  IcebergConnectorMetadataFactories().push_back(std::move(factory));
+  return true;
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnector.h b/velox/connectors/lakehouse/iceberg/IcebergConnector.h
new file mode 100644
index 000000000000..44b19f258031
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnector.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "FileHandle.h"
+#include "IcebergConfig.h"
+#include "velox/connectors/Connector.h"
+#include "velox/core/PlanNode.h"
+#include "velox/type/Type.h"
+
+#include <folly/Executor.h>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+using namespace facebook::velox::connector;
+
+class IcebergConnector : public Connector {
+ public:
+  IcebergConnector(
+      const std::string& id,
+      std::shared_ptr<const facebook::velox::config::ConfigBase> config,
+      folly::Executor* executor);
+
+  const std::shared_ptr<const facebook::velox::config::ConfigBase>&
+  connectorConfig() const {
+    return icebergConfig_->config();
+  }
+
+  bool canAddDynamicFilter() const override {
+    return true;
+  }
+
+  std::unique_ptr<DataSource> createDataSource(
+      const RowTypePtr& outputType,
+      const ConnectorTableHandlePtr& tableHandle,
+      const connector::ColumnHandleMap& columnHandles,
+      ConnectorQueryCtx* connectorQueryCtx) override;
+
+  bool supportsSplitPreload() const override {
+    return true;
+  }
+
+  std::unique_ptr<DataSink> createDataSink(
+      RowTypePtr inputType,
+      ConnectorInsertTableHandlePtr connectorInsertTableHandle,
+      ConnectorQueryCtx* connectorQueryCtx,
+      CommitStrategy commitStrategy) override final;
+
+  folly::Executor* executor() const override {
+    return executor_;
+  }
+
+  FileHandleCacheStats fileHandleCacheStats() {
+    return fileHandleFactory_.cacheStats();
+  }
+
+  // NOTE: this is to clear file handle cache which might affect performance,
+  // and is only used for operational purposes.
+  FileHandleCacheStats clearFileHandleCache() {
+    return fileHandleFactory_.clearCache();
+  }
+
+ protected:
+  const std::shared_ptr<IcebergConfig> icebergConfig_;
+  FileHandleFactory fileHandleFactory_;
+  folly::Executor* executor_;
+};
+
+class IcebergConnectorFactory : public ConnectorFactory {
+ public:
+  static constexpr const char* kIcebergConnectorName = "iceberg";
+
+  IcebergConnectorFactory() : ConnectorFactory(kIcebergConnectorName) {}
+
+  explicit IcebergConnectorFactory(const char* connectorName)
+      : ConnectorFactory(connectorName) {}
+
+  std::shared_ptr<Connector> newConnector(
+      const std::string& id,
+      std::shared_ptr<const velox::config::ConfigBase> config,
+      folly::Executor* ioExecutor = nullptr,
+      folly::Executor* cpuExecutor = nullptr) override {
+    return std::make_shared<IcebergConnector>(id, config, ioExecutor);
+  }
+};
+
+// TODO: Support multiple versioned IcebergPartitionFunctionSpec. Iceberg
+// partition spec can be different for different partitions. E.g. some old
+// partitions may be partitioned by DAY(ds), and new partitions are changed to
+// MONTH(ds). Iceberg table metadata keeps all partition specs. But now only the
+// default partition spec is passed to Velox in the plan fragment now.
+class IcebergPartitionFunctionSpec : public velox::core::PartitionFunctionSpec {
+ public:
+  IcebergPartitionFunctionSpec(
+      int numBuckets,
+      std::vector<int> bucketToPartition,
+      std::vector<column_index_t> channels,
+      std::vector<VectorPtr> constValues)
+      : numBuckets_(numBuckets),
+        bucketToPartition_(std::move(bucketToPartition)),
+        channels_(std::move(channels)),
+        constValues_(std::move(constValues)) {}
+
+  /// The constructor without 'bucketToPartition' input is used in case that
+  /// we don't know the actual number of partitions until we create the
+  /// partition function instance. The hive partition function spec then builds
+  /// a bucket to partition map based on the actual number of partitions with
+  /// round-robin partitioning scheme to create the function instance. For
+  /// instance, when we create the local partition node with hive bucket
+  /// function to support multiple table writer drivers, we don't know the the
+  /// actual number of table writer drivers until start the task.
+  IcebergPartitionFunctionSpec(
+      int numBuckets,
+      std::vector<column_index_t> channels,
+      std::vector<VectorPtr> constValues)
+      : IcebergPartitionFunctionSpec(
+            numBuckets,
+            {},
+            std::move(channels),
+            std::move(constValues)) {}
+
+  std::unique_ptr<core::PartitionFunction> create(
+      int numPartitions,
+      bool localExchange) const override;
+
+  std::string toString() const override;
+
+  folly::dynamic serialize() const override;
+
+  static core::PartitionFunctionSpecPtr deserialize(
+      const folly::dynamic& obj,
+      void* context);
+
+ private:
+  const int numBuckets_;
+  const std::vector<int> bucketToPartition_;
+  const std::vector<column_index_t> channels_;
+  const std::vector<VectorPtr> constValues_;
+};
+
+void registerIcebergPartitionFunctionSerDe();
+
+/// Hook for connecting metadata functions to a IcebergConnector. Each
+/// registered factory is called after initializing a IcebergConnector until one
+/// of these returns a ConnectorMetadata instance.
+class IcebergConnectorMetadataFactory {
+ public:
+  virtual ~IcebergConnectorMetadataFactory() = default;
+};
+
+bool registerIcebergConnectorMetadataFactory(
+    std::unique_ptr<IcebergConnectorMetadataFactory>);
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.cpp b/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.cpp
new file mode 100644
index 000000000000..7bc1d5da050a
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergConnectorSplit.h"
+
+#include "IcebergDeleteFile.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+IcebergConnectorSplit::IcebergConnectorSplit(
+    const std::string& _connectorId,
+    const std::string& _filePath,
+    dwio::common::FileFormat _fileFormat,
+    uint64_t _start,
+    uint64_t _length,
+    const std::unordered_map<std::string, std::optional<std::string>>&
+        _partitionKeys,
+    const std::unordered_map<std::string, std::string>& _serdeParameters,
+    const std::unordered_map<std::string, std::string>& _storageParameters,
+    int64_t _splitWeight,
+    bool _cacheable,
+    const std::vector<IcebergDeleteFile>& _deletes,
+    const std::unordered_map<std::string, std::string>& _infoColumns,
+    std::optional<FileProperties> _properties)
+    : ConnectorSplitBase(
+          _connectorId,
+          _filePath,
+          _fileFormat,
+          _start,
+          _length,
+          _partitionKeys,
+          _serdeParameters,
+          _storageParameters,
+          _splitWeight,
+          _cacheable,
+          _infoColumns,
+          _properties),
+      deleteFiles(_deletes) {}
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h b/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h
new file mode 100644
index 000000000000..7c650c7ad118
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ConnectorSplitBase.h"
+#include "IcebergDeleteFile.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+struct IcebergConnectorSplit : public lakehouse::iceberg::ConnectorSplitBase {
+  std::vector<IcebergDeleteFile> deleteFiles;
+
+  IcebergConnectorSplit(
+      const std::string& _connectorId,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _start = 0,
+      uint64_t _length = std::numeric_limits<uint64_t>::max(),
+      const std::unordered_map<std::string, std::optional<std::string>>&
+          _partitionKeys = {},
+      const std::unordered_map<std::string, std::string>& _serdeParameters = {},
+      const std::unordered_map<std::string, std::string>& _storageParameters =
+          {},
+      int64_t _splitWeight = 0,
+      bool _cacheable = true,
+      const std::vector<IcebergDeleteFile>& _deletes = {},
+      const std::unordered_map<std::string, std::string>& _infoColumns = {},
+      std::optional<FileProperties> _properties = std::nullopt);
+};
+
+class IcebergConnectorSplitBuilder {
+ public:
+  explicit IcebergConnectorSplitBuilder(std::string filePath)
+      : filePath_{std::move(filePath)} {
+    infoColumns_["$path"] = filePath_;
+  }
+
+  IcebergConnectorSplitBuilder& start(uint64_t start) {
+    start_ = start;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& length(uint64_t length) {
+    length_ = length;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& splitWeight(int64_t splitWeight) {
+    splitWeight_ = splitWeight;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& cacheable(bool cacheable) {
+    cacheable_ = cacheable;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& fileFormat(dwio::common::FileFormat format) {
+    fileFormat_ = format;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& infoColumn(
+      const std::string& name,
+      const std::string& value) {
+    infoColumns_.emplace(std::move(name), std::move(value));
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& partitionKeys(
+      const std::unordered_map<std::string, std::optional<std::string>>& partitionKeys) {
+    for (const auto& partitionKey : partitionKeys) {
+      this->partitionKey(partitionKey.first, partitionKey.second);
+    }
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& partitionKey(
+      std::string name,
+      std::optional<std::string> value) {
+    partitionKeys_.emplace(std::move(name), std::move(value));
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& customSplitInfo(
+      const std::unordered_map<std::string, std::string>& customSplitInfo) {
+    customSplitInfo_ = customSplitInfo;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& extraFileInfo(
+      const std::shared_ptr<std::string>& extraFileInfo) {
+    extraFileInfo_ = extraFileInfo;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& serdeParameters(
+      const std::unordered_map<std::string, std::string>& serdeParameters) {
+    serdeParameters_ = serdeParameters;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& connectorId(const std::string& connectorId) {
+    connectorId_ = connectorId;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& fileProperties(FileProperties fileProperties) {
+    fileProperties_ = fileProperties;
+    return *this;
+  }
+
+  IcebergConnectorSplitBuilder& deleteFiles(std::vector<IcebergDeleteFile> deleteFiles) {
+    deleteFiles_ = std::move(deleteFiles);
+    return *this;
+  }
+
+
+  std::unique_ptr<IcebergConnectorSplit> build() const {
+    return std::make_unique<IcebergConnectorSplit>(
+        connectorId_,
+        filePath_,
+        fileFormat_, // dwio::common::FileFormat
+        start_,
+        length_,
+        partitionKeys_,
+        serdeParameters_,
+        storageParameters_,
+        splitWeight_,
+        cacheable_,
+        deleteFiles_,
+        infoColumns_,
+        fileProperties_);
+  }
+
+ private:
+  const std::string filePath_;
+  dwio::common::FileFormat fileFormat_{dwio::common::FileFormat::DWRF};
+  uint64_t start_{0};
+  uint64_t length_{std::numeric_limits<uint64_t>::max()};
+  std::unordered_map<std::string, std::optional<std::string>> partitionKeys_;
+  std::optional<int32_t> tableBucketNumber_;
+  std::unordered_map<std::string, std::string> customSplitInfo_ = {};
+  std::shared_ptr<std::string> extraFileInfo_ = {};
+  std::unordered_map<std::string, std::string> serdeParameters_ = {};
+  std::unordered_map<std::string, std::string> storageParameters_ = {};
+  std::string connectorId_;
+  int64_t splitWeight_{0};
+  bool cacheable_{true};
+  std::vector<IcebergDeleteFile> deleteFiles_;
+  std::unordered_map<std::string, std::string> infoColumns_ = {};
+  std::optional<FileProperties> fileProperties_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.cpp b/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.cpp
new file mode 100644
index 000000000000..46e2d37f6c54
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergConnectorUtil.h"
+
+#include "IcebergConnectorSplit.h"
+#include "IcebergTableHandle.h"
+#include "velox/dwio/common/CachedBufferedInput.h"
+#include "velox/dwio/common/DirectBufferedInput.h"
+#include "velox/expression/Expr.h"
+#include "velox/expression/ExprToSubfieldFilter.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+// TODO: This function needs to be rewritten. The value comparison shall be
+// after the partition transform, which is described in the PartitionSpec
+bool applyPartitionFilter(
+    const TypePtr& type,
+    const std::string& partitionValue,
+    const velox::common::Filter* filter,
+    bool asLocalTime) {
+  if (type->isDate()) {
+    int32_t value = 0;
+    // days_since_epoch partition values are integers in string format. Eg.
+    // Iceberg partition values.
+    value = folly::to<int32_t>(partitionValue);
+    return applyFilter(*filter, value);
+  }
+
+  switch (type->kind()) {
+    case TypeKind::BIGINT:
+    case TypeKind::INTEGER:
+    case TypeKind::SMALLINT:
+    case TypeKind::TINYINT: {
+      return applyFilter(*filter, folly::to<int64_t>(partitionValue));
+    }
+    case TypeKind::REAL:
+    case TypeKind::DOUBLE: {
+      return applyFilter(*filter, folly::to<double>(partitionValue));
+    }
+    case TypeKind::BOOLEAN: {
+      return applyFilter(*filter, folly::to<bool>(partitionValue));
+    }
+    case TypeKind::TIMESTAMP: {
+      auto result = util::fromTimestampString(
+          StringView(partitionValue), util::TimestampParseMode::kPrestoCast);
+      VELOX_CHECK(!result.hasError());
+      if (asLocalTime) {
+        result.value().toGMT(Timestamp::defaultTimezone());
+      }
+      return applyFilter(*filter, result.value());
+    }
+    case TypeKind::VARCHAR: {
+      return applyFilter(*filter, partitionValue);
+    }
+    default:
+      VELOX_FAIL(
+          "Bad type {} for partition value: {}", type->kind(), partitionValue);
+  }
+}
+
+// TODO: This function needs to be rewritten. The value comparison shall be
+// after the partition transform, which is described in the PartitionSpec.
+// Instead of passing only a map of partition keys and values, we need to also
+// pass in the PartitionSpec and apply the transforms before the comparison.
+bool filterSplit(
+    const velox::common::ScanSpec* scanSpec,
+    const dwio::common::Reader* reader,
+    const std::string& filePath,
+    const std::unordered_map<std::string, std::optional<std::string>>&
+        partitionData,
+    const std::unordered_map<
+        std::string,
+        std::shared_ptr<const ColumnHandleBase>>& partitionKeysHandle,
+    bool asLocalTime) {
+  const auto totalRows = reader->numberOfRows();
+  const auto& fileTypeWithId = reader->typeWithId();
+  const auto& rowType = reader->rowType();
+  for (const auto& child : scanSpec->children()) {
+    if (child->filter()) {
+      const auto& name = child->fieldName();
+      auto iter = partitionData.find(name);
+
+      // Test if the partition data of this split passes the filter
+      if (iter != partitionData.end()) {
+        if (iter->second.has_value()) {
+          const auto handlesIter = partitionKeysHandle.find(name);
+          VELOX_CHECK(handlesIter != partitionKeysHandle.end());
+
+          auto icebergPartitionColumnHandle =
+              std::dynamic_pointer_cast<const IcebergColumnHandle>(
+                  handlesIter->second);
+          VELOX_CHECK_NOT_NULL(icebergPartitionColumnHandle);
+
+          // TODO: check if it's a partition key column
+
+          // This is a non-null partition key
+          return applyPartitionFilter(
+              icebergPartitionColumnHandle->dataType(),
+              iter->second.value(),
+              child->filter(),
+              asLocalTime);
+        }
+        // Column is missing, most likely due to schema evolution. Or it's a
+        // partition key but the partition value is NULL.
+        if (child->filter()->isDeterministic() &&
+            !child->filter()->testNull()) {
+          VLOG(1) << "Skipping " << filePath
+                  << " because the filter testNull() failed for column "
+                  << child->fieldName();
+          return false;
+        }
+      } else {
+        const auto& typeWithId = fileTypeWithId->childByName(name);
+        const auto columnStats = reader->columnStatistics(typeWithId->id());
+        if (columnStats != nullptr &&
+            !testFilter(
+                child->filter(),
+                columnStats.get(),
+                totalRows.value(),
+                typeWithId->type())) {
+          VLOG(1) << "Skipping " << filePath
+                  << " based on stats and filter for column "
+                  << child->fieldName();
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.h b/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.h
new file mode 100644
index 000000000000..cbe26eb55a21
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "TableHandleBase.h"
+#include "velox/dwio/common/Reader.h"
+#include "velox/type/Filter.h"
+#include "velox/type/Type.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+bool applyPartitionFilter(
+    const TypePtr& type,
+    const std::string& partitionValue,
+    const velox::common::Filter* filter,
+    bool asLocalTime);
+
+bool filterSplit(
+    const velox::common::ScanSpec* scanSpec,
+    const dwio::common::Reader* reader,
+    const std::string& filePath,
+    const std::unordered_map<std::string, std::optional<std::string>>&
+        partitionData,
+    const std::unordered_map<
+        std::string,
+        std::shared_ptr<const ColumnHandleBase>>& partitionKeysHandle,
+    bool asLocalTime);
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDataSink.cpp b/velox/connectors/lakehouse/iceberg/IcebergDataSink.cpp
new file mode 100644
index 000000000000..8dc922d78e97
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDataSink.cpp
@@ -0,0 +1,1288 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/IcebergDataSink.h"
+
+#include "velox/common/base/Fs.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnectorUtil.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.h"
+#include "velox/dwio/common/SortingWriter.h"
+
+#include <boost/lexical_cast.hpp>
+#include <boost/uuid/uuid_generators.hpp>
+#include <boost/uuid/uuid_io.hpp>
+
+using facebook::velox::common::testutil::TestValue;
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+namespace {
+#define WRITER_NON_RECLAIMABLE_SECTION_GUARD(index)       \
+  memory::NonReclaimableSectionGuard nonReclaimableGuard( \
+      writerInfo_[(index)]->nonReclaimableSectionHolder.get())
+
+// Returns the type of non-partition data columns.
+RowTypePtr getNonPartitionTypes(
+    const std::vector<column_index_t>& dataCols,
+    const RowTypePtr& inputType) {
+  std::vector<std::string> childNames;
+  std::vector<TypePtr> childTypes;
+  const auto& dataSize = dataCols.size();
+  childNames.reserve(dataSize);
+  childTypes.reserve(dataSize);
+  for (int dataCol : dataCols) {
+    childNames.push_back(inputType->nameOf(dataCol));
+    childTypes.push_back(inputType->childAt(dataCol));
+  }
+
+  return ROW(std::move(childNames), std::move(childTypes));
+}
+
+// Filters out partition columns if there is any.
+RowVectorPtr makeDataInput(
+    const std::vector<column_index_t>& dataCols,
+    const RowVectorPtr& input) {
+  std::vector<VectorPtr> childVectors;
+  childVectors.reserve(dataCols.size());
+  for (int dataCol : dataCols) {
+    childVectors.push_back(input->childAt(dataCol));
+  }
+
+  return std::make_shared<RowVector>(
+      input->pool(),
+      getNonPartitionTypes(dataCols, asRowType(input->type())),
+      input->nulls(),
+      input->size(),
+      std::move(childVectors),
+      input->getNullCount());
+}
+
+// Returns a subset of column indices corresponding to partition keys.
+std::vector<column_index_t> getPartitionChannels(
+    const std::shared_ptr<const IcebergInsertTableHandle>& insertTableHandle) {
+  std::vector<column_index_t> channels;
+
+  for (column_index_t i = 0; i < insertTableHandle->inputColumns().size();
+       i++) {
+    auto inputColumnBase =
+        std::dynamic_pointer_cast<const ColumnHandleBase>(insertTableHandle->inputColumns()[i]);
+    VELOX_CHECK(inputColumnBase);
+    if (inputColumnBase->isPartitionKey()) {
+      channels.push_back(i);
+    }
+  }
+
+  return channels;
+}
+
+// Returns the column indices of non-partition data columns.
+std::vector<column_index_t> getNonPartitionChannels(
+    const std::shared_ptr<const IcebergInsertTableHandle>& insertTableHandle) {
+  std::vector<column_index_t> dataChannels;
+
+  for (column_index_t i = 0; i < insertTableHandle->inputColumns().size();
+       i++) {
+    auto inputColumnBase =
+        std::dynamic_pointer_cast<const ColumnHandleBase>(insertTableHandle->inputColumns()[i]);
+    VELOX_CHECK(inputColumnBase);
+    if (inputColumnBase->isPartitionKey()) {
+      dataChannels.push_back(i);
+    }
+  }
+
+  return dataChannels;
+}
+
+std::string makePartitionDirectory(
+    const std::string& tableDirectory,
+    const std::optional<std::string>& partitionSubdirectory) {
+  if (partitionSubdirectory.has_value()) {
+    return fs::path(tableDirectory) / partitionSubdirectory.value();
+  }
+  return tableDirectory;
+}
+
+std::unordered_map<LocationHandle::TableType, std::string> tableTypeNames() {
+  return {
+      {LocationHandle::TableType::kNew, "kNew"},
+      {LocationHandle::TableType::kExisting, "kExisting"},
+  };
+}
+
+template <typename K, typename V>
+std::unordered_map<V, K> invertMap(const std::unordered_map<K, V>& mapping) {
+  std::unordered_map<V, K> inverted;
+  for (const auto& [key, value] : mapping) {
+    inverted.emplace(value, key);
+  }
+  return inverted;
+}
+
+//std::unique_ptr<core::PartitionFunction> createBucketFunction(
+//    const IcebergBucketProperty& bucketProperty,
+//    const RowTypePtr& inputType) {
+//  const auto& bucketedBy = bucketProperty.bucketedBy();
+//  const auto& bucketedTypes = bucketProperty.bucketedTypes();
+//  std::vector<column_index_t> bucketedByChannels;
+//  bucketedByChannels.reserve(bucketedBy.size());
+//  for (int32_t i = 0; i < bucketedBy.size(); ++i) {
+//    const auto& bucketColumn = bucketedBy[i];
+//    const auto& bucketType = bucketedTypes[i];
+//    const auto inputChannel = inputType->getChildIdx(bucketColumn);
+//    if (FOLLY_UNLIKELY(
+//            !inputType->childAt(inputChannel)->equivalent(*bucketType))) {
+//      VELOX_USER_FAIL(
+//          "Input column {} type {} doesn't match bucket type {}",
+//          inputType->nameOf(inputChannel),
+//          inputType->childAt(inputChannel)->toString(),
+//          bucketType->toString());
+//    }
+//    bucketedByChannels.push_back(inputChannel);
+//  }
+//  return std::make_unique<iceberg::IcebergPartitionFunction>(
+//      bucketProperty.bucketCount(), bucketedByChannels);
+//}
+
+std::string computeBucketedFileName(
+    const std::string& queryId,
+    uint32_t maxBucketCount,
+    uint32_t bucket) {
+  const uint32_t kMaxBucketCountPadding =
+      std::to_string(maxBucketCount - 1).size();
+  const std::string bucketValueStr = std::to_string(bucket);
+  return fmt::format(
+      "0{:0>{}}_0_{}", bucketValueStr, kMaxBucketCountPadding, queryId);
+}
+
+std::shared_ptr<memory::MemoryPool> createSinkPool(
+    const std::shared_ptr<memory::MemoryPool>& writerPool) {
+  return writerPool->addLeafChild(fmt::format("{}.sink", writerPool->name()));
+}
+
+std::shared_ptr<memory::MemoryPool> createSortPool(
+    const std::shared_ptr<memory::MemoryPool>& writerPool) {
+  return writerPool->addLeafChild(fmt::format("{}.sort", writerPool->name()));
+}
+
+uint64_t getFinishTimeSliceLimitMsFromicebergConfig(
+    const std::shared_ptr<const iceberg::IcebergConfig>& config,
+    const config::ConfigBase* sessions) {
+  const uint64_t flushTimeSliceLimitMsFromConfig = 0;
+      //config->sortWriterFinishTimeSliceLimitMs(sessions);
+  // NOTE: if the flush time slice limit is set to 0, then we treat it as no
+  // limit.
+  return flushTimeSliceLimitMsFromConfig == 0
+      ? std::numeric_limits<uint64_t>::max()
+      : flushTimeSliceLimitMsFromConfig;
+}
+
+//FOLLY_ALWAYS_INLINE int32_t
+//getBucketCount(const IcebergBucketProperty* bucketProperty) {
+//  return bucketProperty == nullptr ? 0 : bucketProperty->bucketCount();
+//}
+} // namespace
+
+const IcebergWriterId& IcebergWriterId::unpartitionedId() {
+  static const IcebergWriterId writerId{0};
+  return writerId;
+}
+
+std::string IcebergWriterId::toString() const {
+  if (partitionId.has_value() && bucketId.has_value()) {
+    return fmt::format("part[{}.{}]", partitionId.value(), bucketId.value());
+  }
+
+  if (partitionId.has_value() && !bucketId.has_value()) {
+    return fmt::format("part[{}]", partitionId.value());
+  }
+
+  // This WriterId is used to add an identifier in the MemoryPools. This could
+  // indicate unpart, but the bucket number needs to be disambiguated. So
+  // creating a new label using bucket.
+  if (!partitionId.has_value() && bucketId.has_value()) {
+    return fmt::format("bucket[{}]", bucketId.value());
+  }
+
+  return "unpart";
+}
+
+const std::string LocationHandle::tableTypeName(
+    LocationHandle::TableType type) {
+  static const auto tableTypes = tableTypeNames();
+  return tableTypes.at(type);
+}
+
+LocationHandle::TableType LocationHandle::tableTypeFromName(
+    const std::string& name) {
+  static const auto nameTableTypes = invertMap(tableTypeNames());
+  return nameTableTypes.at(name);
+}
+
+IcebergSortingColumn::IcebergSortingColumn(
+    const std::string& sortColumn,
+    const core::SortOrder& sortOrder)
+    : sortColumn_(sortColumn), sortOrder_(sortOrder) {
+  VELOX_USER_CHECK(!sortColumn_.empty(), "hive sort column must be set");
+
+  if (FOLLY_UNLIKELY(
+          (sortOrder_.isAscending() && !sortOrder_.isNullsFirst()) ||
+          (!sortOrder_.isAscending() && sortOrder_.isNullsFirst()))) {
+    VELOX_USER_FAIL("Bad hive sort order: {}", toString());
+  }
+}
+
+folly::dynamic IcebergSortingColumn::serialize() const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = "IcebergSortingColumn";
+  obj["columnName"] = sortColumn_;
+  obj["sortOrder"] = sortOrder_.serialize();
+  return obj;
+}
+
+std::shared_ptr<IcebergSortingColumn> IcebergSortingColumn::deserialize(
+    const folly::dynamic& obj,
+    void* context) {
+  const std::string columnName = obj["columnName"].asString();
+  const auto sortOrder = core::SortOrder::deserialize(obj["sortOrder"]);
+  return std::make_shared<IcebergSortingColumn>(columnName, sortOrder);
+}
+
+std::string IcebergSortingColumn::toString() const {
+  return fmt::format(
+      "[COLUMN[{}] ORDER[{}]]", sortColumn_, sortOrder_.toString());
+}
+
+void IcebergSortingColumn::registerSerDe() {
+  auto& registry = DeserializationWithContextRegistryForSharedPtr();
+  registry.Register("IcebergSortingColumn", IcebergSortingColumn::deserialize);
+}
+
+//IcebergBucketProperty::IcebergBucketProperty(
+//    Kind kind,
+//    int32_t bucketCount,
+//    const std::vector<std::string>& bucketedBy,
+//    const std::vector<TypePtr>& bucketTypes,
+//    const std::vector<std::shared_ptr<const IcebergSortingColumn>>& sortedBy)
+//    : kind_(kind),
+//      bucketCount_(bucketCount),
+//      bucketedBy_(bucketedBy),
+//      bucketTypes_(bucketTypes),
+//      sortedBy_(sortedBy) {
+//  validate();
+//}
+//
+//void IcebergBucketProperty::validate() const {
+//  VELOX_USER_CHECK_GT(bucketCount_, 0, "Hive bucket count can't be zero");
+//  VELOX_USER_CHECK(!bucketedBy_.empty(), "Hive bucket columns must be set");
+//  VELOX_USER_CHECK_EQ(
+//      bucketedBy_.size(),
+//      bucketTypes_.size(),
+//      "The number of hive bucket columns and types do not match {}",
+//      toString());
+//}
+//
+//std::string IcebergBucketProperty::kindString(Kind kind) {
+//  switch (kind) {
+//    case Kind::kHiveCompatible:
+//      return "HIVE_COMPATIBLE";
+//    case Kind::kPrestoNative:
+//      return "PRESTO_NATIVE";
+//    default:
+//      return fmt::format("UNKNOWN {}", static_cast<int>(kind));
+//  }
+//}
+//
+//folly::dynamic IcebergBucketProperty::serialize() const {
+//  folly::dynamic obj = folly::dynamic::object;
+//  obj["name"] = "IcebergBucketProperty";
+//  obj["kind"] = static_cast<int64_t>(kind_);
+//  obj["bucketCount"] = bucketCount_;
+//  obj["bucketedBy"] = ISerializable::serialize(bucketedBy_);
+//  obj["bucketedTypes"] = ISerializable::serialize(bucketTypes_);
+//  obj["sortedBy"] = ISerializable::serialize(sortedBy_);
+//  return obj;
+//}
+//
+//std::shared_ptr<IcebergBucketProperty> IcebergBucketProperty::deserialize(
+//    const folly::dynamic& obj,
+//    void* context) {
+//  const Kind kind = static_cast<Kind>(obj["kind"].asInt());
+//  const int32_t bucketCount = obj["bucketCount"].asInt();
+//  const auto buckectedBy =
+//      ISerializable::deserialize<std::vector<std::string>>(obj["bucketedBy"]);
+//  const auto bucketedTypes = ISerializable::deserialize<std::vector<Type>>(
+//      obj["bucketedTypes"], context);
+//  const auto sortedBy =
+//      ISerializable::deserialize<std::vector<IcebergSortingColumn>>(
+//          obj["sortedBy"], context);
+//  return std::make_shared<IcebergBucketProperty>(
+//      kind, bucketCount, buckectedBy, bucketedTypes, sortedBy);
+//}
+//
+//void IcebergBucketProperty::registerSerDe() {
+//  auto& registry = DeserializationWithContextRegistryForSharedPtr();
+//  registry.Register("IcebergBucketProperty", IcebergBucketProperty::deserialize);
+//}
+//
+//std::string IcebergBucketProperty::toString() const {
+//  std::stringstream out;
+//  out << "\nIcebergBucketProperty[<" << kind_ << " " << bucketCount_ << ">\n";
+//  out << "\tBucket Columns:\n";
+//  for (const auto& column : bucketedBy_) {
+//    out << "\t\t" << column << "\n";
+//  }
+//  out << "\tBucket Types:\n";
+//  for (const auto& type : bucketTypes_) {
+//    out << "\t\t" << type->toString() << "\n";
+//  }
+//  if (!sortedBy_.empty()) {
+//    out << "\tSortedBy Columns:\n";
+//    for (const auto& sortColum : sortedBy_) {
+//      out << "\t\t" << sortColum->toString() << "\n";
+//    }
+//  }
+//  out << "]\n";
+//  return out.str();
+//}
+
+IcebergDataSink::IcebergDataSink(
+    RowTypePtr inputType,
+    std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    CommitStrategy commitStrategy,
+    const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig)
+    : IcebergDataSink(
+          inputType,
+          insertTableHandle,
+          connectorQueryCtx,
+          commitStrategy,
+          icebergConfig,
+//          getBucketCount(insertTableHandle->bucketProperty()),
+//          getBucketCount(insertTableHandle->bucketProperty()) > 0
+//              ? createBucketFunction(
+//                    *insertTableHandle->bucketProperty(),
+//                    inputType)
+//              : nullptr,
+          getNonPartitionChannels(insertTableHandle)) {}
+
+IcebergDataSink::IcebergDataSink(
+    RowTypePtr inputType,
+    std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    CommitStrategy commitStrategy,
+    const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig,
+//    uint32_t bucketCount,
+//    std::unique_ptr<core::PartitionFunction> bucketFunction,
+    const std::vector<column_index_t>& dataChannels)
+    : inputType_(std::move(inputType)),
+      insertTableHandle_(std::move(insertTableHandle)),
+      connectorQueryCtx_(connectorQueryCtx),
+      commitStrategy_(commitStrategy),
+      icebergConfig_(icebergConfig),
+      updateMode_(getUpdateMode()),
+//      maxOpenWriters_(icebergConfig_->maxPartitionsPerWriters(
+//          connectorQueryCtx->sessionProperties())),
+      maxOpenWriters_(100),
+      partitionChannels_(getPartitionChannels(insertTableHandle_)),
+      partitionIdGenerator_(
+          !partitionChannels_.empty()
+              ? std::make_unique<PartitionIdGenerator>(
+                    inputType_,
+                    partitionChannels_,
+                    maxOpenWriters_,
+                    connectorQueryCtx_->memoryPool(),
+                    icebergConfig_->isPartitionPathAsLowerCase(
+                        connectorQueryCtx->sessionProperties()))
+              : nullptr),
+      dataChannels_(dataChannels),
+//      bucketCount_(static_cast<int32_t>(bucketCount)),
+//      bucketFunction_(std::move(bucketFunction)),
+      writerFactory_(
+          dwio::common::getWriterFactory(insertTableHandle_->storageFormat())),
+      spillConfig_(connectorQueryCtx->spillConfig()),
+      sortWriterFinishTimeSliceLimitMs_(getFinishTimeSliceLimitMsFromicebergConfig(
+          icebergConfig_,
+          connectorQueryCtx->sessionProperties())),
+      fileNameGenerator_(insertTableHandle_->fileNameGenerator()) {
+//  if (isBucketed()) {
+//    VELOX_USER_CHECK_LT(
+//        bucketCount_,
+//        icebergConfig_->maxBucketCount(connectorQueryCtx->sessionProperties()),
+//        "bucketCount exceeds the limit");
+//  }
+
+  if (insertTableHandle_->ensureFiles()) {
+    VELOX_CHECK(
+        !isPartitioned(),
+        "ensureFiles is not supported partition keys in the data");
+    ensureWriter(IcebergWriterId::unpartitionedId());
+  }
+}
+
+bool IcebergDataSink::canReclaim() const {
+  // Currently, we only support memory reclaim on dwrf file writer.
+  return (spillConfig_ != nullptr) &&
+      (insertTableHandle_->storageFormat() == dwio::common::FileFormat::DWRF);
+}
+
+void IcebergDataSink::appendData(RowVectorPtr input) {
+  checkRunning();
+
+  // Lazy load all the input columns.
+  input->loadedVector();
+
+  // Write to unpartitioned (and unbucketed) table.
+  if (!isPartitioned()) {
+    const auto index = ensureWriter(IcebergWriterId::unpartitionedId());
+    write(index, input);
+    return;
+  }
+
+  // Compute partition and bucket numbers.
+  computePartitionAndBucketIds(input);
+
+  splitInputRowsAndEnsureWriters(input);
+
+  // All inputs belong to a single non-bucketed partition. The partition id
+  // must be zero.
+  if (partitionIdGenerator_->numPartitions() == 1) {
+    const auto index = ensureWriter(IcebergWriterId{0});
+    write(index, input);
+    return;
+  }
+
+  for (auto index = 0; index < writers_.size(); ++index) {
+    const vector_size_t partitionSize = partitionSizes_[index];
+    if (partitionSize == 0) {
+      continue;
+    }
+
+    RowVectorPtr writerInput = partitionSize == input->size()
+        ? input
+        : exec::wrap(partitionSize, partitionRows_[index], input);
+    write(index, writerInput);
+  }
+}
+
+void IcebergDataSink::write(size_t index, RowVectorPtr input) {
+  WRITER_NON_RECLAIMABLE_SECTION_GUARD(index);
+  auto dataInput = makeDataInput(dataChannels_, input);
+
+  writers_[index]->write(dataInput);
+  writerInfo_[index]->inputSizeInBytes += dataInput->estimateFlatSize();
+  writerInfo_[index]->numWrittenRows += dataInput->size();
+}
+
+std::string IcebergDataSink::stateString(State state) {
+  switch (state) {
+    case State::kRunning:
+      return "RUNNING";
+    case State::kFinishing:
+      return "FLUSHING";
+    case State::kClosed:
+      return "CLOSED";
+    case State::kAborted:
+      return "ABORTED";
+    default:
+      VELOX_UNREACHABLE("BAD STATE: {}", static_cast<int>(state));
+  }
+}
+
+void IcebergDataSink::computePartitionAndBucketIds(const RowVectorPtr& input) {
+  VELOX_CHECK(isPartitioned());
+  if (isPartitioned()) {
+    if (!icebergConfig_->allowNullPartitionKeys(
+            connectorQueryCtx_->sessionProperties())) {
+      // Check that there are no nulls in the partition keys.
+      for (auto& partitionIdx : partitionChannels_) {
+        auto col = input->childAt(partitionIdx);
+        if (col->mayHaveNulls()) {
+          for (auto i = 0; i < col->size(); ++i) {
+            VELOX_USER_CHECK(
+                !col->isNullAt(i),
+                "Partition key must not be null: {}",
+                input->type()->asRow().nameOf(partitionIdx));
+          }
+        }
+      }
+    }
+    partitionIdGenerator_->run(input, partitionIds_);
+  }
+
+//  if (isBucketed()) {
+//    bucketFunction_->partition(*input, bucketIds_);
+//  }
+}
+
+DataSink::Stats IcebergDataSink::stats() const {
+  Stats stats;
+  if (state_ == State::kAborted) {
+    return stats;
+  }
+
+  int64_t numWrittenBytes{0};
+  int64_t writeIOTimeUs{0};
+  for (const auto& ioStats : ioStats_) {
+    numWrittenBytes += ioStats->rawBytesWritten();
+    writeIOTimeUs += ioStats->writeIOTimeUs();
+  }
+  stats.numWrittenBytes = numWrittenBytes;
+  stats.writeIOTimeUs = writeIOTimeUs;
+
+  if (state_ != State::kClosed) {
+    return stats;
+  }
+
+  stats.numWrittenFiles = writers_.size();
+  for (int i = 0; i < writerInfo_.size(); ++i) {
+    const auto& info = writerInfo_.at(i);
+    VELOX_CHECK_NOT_NULL(info);
+    const auto spillStats = info->spillStats->rlock();
+    if (!spillStats->empty()) {
+      stats.spillStats += *spillStats;
+    }
+  }
+  return stats;
+}
+
+std::shared_ptr<memory::MemoryPool> IcebergDataSink::createWriterPool(
+    const IcebergWriterId& writerId) {
+  auto* connectorPool = connectorQueryCtx_->connectorMemoryPool();
+  return connectorPool->addAggregateChild(
+      fmt::format("{}.{}", connectorPool->name(), writerId.toString()));
+}
+
+void IcebergDataSink::setMemoryReclaimers(
+    IcebergWriterInfo* writerInfo,
+    io::IoStatistics* ioStats) {
+  auto* connectorPool = connectorQueryCtx_->connectorMemoryPool();
+  if (connectorPool->reclaimer() == nullptr) {
+    return;
+  }
+  writerInfo->writerPool->setReclaimer(
+      WriterReclaimer::create(this, writerInfo, ioStats));
+  writerInfo->sinkPool->setReclaimer(exec::MemoryReclaimer::create());
+  // NOTE: we set the memory reclaimer for sort pool when we construct the sort
+  // writer.
+}
+
+void IcebergDataSink::setState(State newState) {
+  checkStateTransition(state_, newState);
+  state_ = newState;
+}
+
+/// Validates the state transition from 'oldState' to 'newState'.
+void IcebergDataSink::checkStateTransition(State oldState, State newState) {
+  switch (oldState) {
+    case State::kRunning:
+      if (newState == State::kAborted || newState == State::kFinishing) {
+        return;
+      }
+      break;
+    case State::kFinishing:
+      if (newState == State::kAborted || newState == State::kClosed ||
+          // The finishing state is reentry state if we yield in the middle of
+          // finish processing if a single run takes too long.
+          newState == State::kFinishing) {
+        return;
+      }
+      [[fallthrough]];
+    case State::kAborted:
+    case State::kClosed:
+    default:
+      break;
+  }
+  VELOX_FAIL("Unexpected state transition from {} to {}", oldState, newState);
+}
+
+bool IcebergDataSink::finish() {
+  // Flush is reentry state.
+  setState(State::kFinishing);
+
+  // As for now, only sorted writer needs flush buffered data. For non-sorted
+  // writer, data is directly written to the underlying file writer.
+  if (!sortWrite()) {
+    return true;
+  }
+
+  // TODO: we might refactor to move the data sorting logic into hive data sink.
+  const uint64_t startTimeMs = getCurrentTimeMs();
+  for (auto i = 0; i < writers_.size(); ++i) {
+    WRITER_NON_RECLAIMABLE_SECTION_GUARD(i);
+    if (!writers_[i]->finish()) {
+      return false;
+    }
+    if (getCurrentTimeMs() - startTimeMs > sortWriterFinishTimeSliceLimitMs_) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::vector<std::string> IcebergDataSink::close() {
+  setState(State::kClosed);
+  closeInternal();
+  return commitMessage();
+}
+
+std::vector<std::string> IcebergDataSink::commitMessage() const {
+  std::vector<std::string> partitionUpdates;
+  partitionUpdates.reserve(writerInfo_.size());
+  for (int i = 0; i < writerInfo_.size(); ++i) {
+    const auto& info = writerInfo_.at(i);
+    VELOX_CHECK_NOT_NULL(info);
+    // clang-format off
+    auto partitionUpdateJson = folly::toJson(
+     folly::dynamic::object
+        ("name", info->writerParameters.partitionName().value_or(""))
+        ("updateMode",
+          IcebergWriterParameters::updateModeToString(
+            info->writerParameters.updateMode()))
+        ("writePath", info->writerParameters.writeDirectory())
+        ("targetPath", info->writerParameters.targetDirectory())
+        ("fileWriteInfos", folly::dynamic::array(
+          folly::dynamic::object
+            ("writeFileName", info->writerParameters.writeFileName())
+            ("targetFileName", info->writerParameters.targetFileName())
+            ("fileSize", ioStats_.at(i)->rawBytesWritten())))
+        ("rowCount", info->numWrittenRows)
+        ("inMemoryDataSizeInBytes", info->inputSizeInBytes)
+        ("onDiskDataSizeInBytes", ioStats_.at(i)->rawBytesWritten())
+        ("containsNumberedFileNames", true));
+    // clang-format on
+    partitionUpdates.push_back(partitionUpdateJson);
+  }
+  return partitionUpdates;
+}
+
+void IcebergDataSink::abort() {
+  setState(State::kAborted);
+  closeInternal();
+}
+
+void IcebergDataSink::closeInternal() {
+  VELOX_CHECK_NE(state_, State::kRunning);
+  VELOX_CHECK_NE(state_, State::kFinishing);
+
+  TestValue::adjust(
+      "facebook::velox::connector::lakehouse::common::IcebergDataSink::closeInternal", this);
+
+  if (state_ == State::kClosed) {
+    for (int i = 0; i < writers_.size(); ++i) {
+      WRITER_NON_RECLAIMABLE_SECTION_GUARD(i);
+      writers_[i]->close();
+    }
+  } else {
+    for (int i = 0; i < writers_.size(); ++i) {
+      WRITER_NON_RECLAIMABLE_SECTION_GUARD(i);
+      writers_[i]->abort();
+    }
+  }
+}
+
+uint32_t IcebergDataSink::ensureWriter(const IcebergWriterId& id) {
+  auto it = writerIndexMap_.find(id);
+  if (it != writerIndexMap_.end()) {
+    return it->second;
+  }
+  return appendWriter(id);
+}
+
+uint32_t IcebergDataSink::appendWriter(const IcebergWriterId& id) {
+  // Check max open writers.
+  VELOX_USER_CHECK_LE(
+      writers_.size(), maxOpenWriters_, "Exceeded open writer limit");
+  VELOX_CHECK_EQ(writers_.size(), writerInfo_.size());
+  VELOX_CHECK_EQ(writerIndexMap_.size(), writerInfo_.size());
+
+  std::optional<std::string> partitionName = getPartitionName(id);
+
+  // Without explicitly setting flush policy, the default memory based flush
+  // policy is used.
+  auto writerParameters = getWriterParameters(partitionName, id.bucketId);
+  const auto writePath = fs::path(writerParameters.writeDirectory()) /
+      writerParameters.writeFileName();
+  auto writerPool = createWriterPool(id);
+  auto sinkPool = createSinkPool(writerPool);
+  std::shared_ptr<memory::MemoryPool> sortPool{nullptr};
+  if (sortWrite()) {
+    sortPool = createSortPool(writerPool);
+  }
+  writerInfo_.emplace_back(std::make_shared<IcebergWriterInfo>(
+      std::move(writerParameters),
+      std::move(writerPool),
+      std::move(sinkPool),
+      std::move(sortPool)));
+  ioStats_.emplace_back(std::make_shared<io::IoStatistics>());
+  setMemoryReclaimers(writerInfo_.back().get(), ioStats_.back().get());
+
+  // Take the writer options provided by the user as a starting point, or
+  // allocate a new one.
+  auto options = insertTableHandle_->writerOptions();
+  if (!options) {
+    options = writerFactory_->createWriterOptions();
+  }
+
+  const auto* connectorSessionProperties =
+      connectorQueryCtx_->sessionProperties();
+
+  // Only overwrite options in case they were not already provided.
+  if (options->schema == nullptr) {
+    options->schema = getNonPartitionTypes(dataChannels_, inputType_);
+  }
+
+  if (options->memoryPool == nullptr) {
+    options->memoryPool = writerInfo_.back()->writerPool.get();
+  }
+
+  if (!options->compressionKind) {
+    options->compressionKind = insertTableHandle_->compressionKind();
+  }
+
+  if (options->spillConfig == nullptr && canReclaim()) {
+    options->spillConfig = spillConfig_;
+  }
+
+  if (options->nonReclaimableSection == nullptr) {
+    options->nonReclaimableSection =
+        writerInfo_.back()->nonReclaimableSectionHolder.get();
+  }
+
+  if (options->memoryReclaimerFactory == nullptr ||
+      options->memoryReclaimerFactory() == nullptr) {
+    options->memoryReclaimerFactory = []() {
+      return exec::MemoryReclaimer::create();
+    };
+  }
+
+  if (options->serdeParameters.empty()) {
+    options->serdeParameters = std::map<std::string, std::string>(
+        insertTableHandle_->serdeParameters().begin(),
+        insertTableHandle_->serdeParameters().end());
+  }
+
+  options->sessionTimezoneName = connectorQueryCtx_->sessionTimezone();
+  options->adjustTimestampToTimezone =
+      connectorQueryCtx_->adjustTimestampToTimezone();
+  options->processConfigs(*icebergConfig_->config(), *connectorSessionProperties);
+
+  // Prevents the memory allocation during the writer creation.
+  WRITER_NON_RECLAIMABLE_SECTION_GUARD(writerInfo_.size() - 1);
+  auto writer = writerFactory_->createWriter(
+      dwio::common::FileSink::create(
+          writePath,
+          {
+              .bufferWrite = false,
+              .connectorProperties = icebergConfig_->config(),
+              .fileCreateConfig = icebergConfig_->writeFileCreateConfig(),
+              .pool = writerInfo_.back()->sinkPool.get(),
+              .metricLogger = dwio::common::MetricsLog::voidLog(),
+              .stats = ioStats_.back().get(),
+          }),
+      options);
+  writer = maybeCreateBucketSortWriter(std::move(writer));
+  writers_.emplace_back(std::move(writer));
+
+  extendBuffersForPartitionedTables();
+
+  writerIndexMap_.emplace(id, writers_.size() - 1);
+  return writerIndexMap_[id];
+}
+
+std::optional<std::string> IcebergDataSink::getPartitionName(
+    const IcebergWriterId& id) const {
+  std::optional<std::string> partitionName;
+  if (isPartitioned()) {
+    partitionName =
+        partitionIdGenerator_->partitionName(id.partitionId.value());
+  }
+  return partitionName;
+}
+
+std::unique_ptr<facebook::velox::dwio::common::Writer>
+IcebergDataSink::maybeCreateBucketSortWriter(
+    std::unique_ptr<facebook::velox::dwio::common::Writer> writer) {
+  if (!sortWrite()) {
+    return writer;
+  }
+  auto* sortPool = writerInfo_.back()->sortPool.get();
+  VELOX_CHECK_NOT_NULL(sortPool);
+  auto sortBuffer = std::make_unique<exec::SortBuffer>(
+      getNonPartitionTypes(dataChannels_, inputType_),
+      sortColumnIndices_,
+      sortCompareFlags_,
+      sortPool,
+      writerInfo_.back()->nonReclaimableSectionHolder.get(),
+      connectorQueryCtx_->prefixSortConfig(),
+      spillConfig_,
+      writerInfo_.back()->spillStats.get());
+  return std::make_unique<dwio::common::SortingWriter>(
+      std::move(writer),
+      std::move(sortBuffer),
+      icebergConfig_->sortWriterMaxOutputRows(
+          connectorQueryCtx_->sessionProperties()),
+      icebergConfig_->sortWriterMaxOutputBytes(
+          connectorQueryCtx_->sessionProperties()),
+      sortWriterFinishTimeSliceLimitMs_);
+}
+
+void IcebergDataSink::extendBuffersForPartitionedTables() {
+  // Extends the buffer used for partition rows calculations.
+  partitionSizes_.emplace_back(0);
+  partitionRows_.emplace_back(nullptr);
+  rawPartitionRows_.emplace_back(nullptr);
+}
+
+IcebergWriterId IcebergDataSink::getWriterId(size_t row) const {
+  std::optional<int32_t> partitionId;
+  if (isPartitioned()) {
+    VELOX_CHECK_LT(partitionIds_[row], std::numeric_limits<uint32_t>::max());
+    partitionId = static_cast<uint32_t>(partitionIds_[row]);
+  }
+
+  std::optional<int32_t> bucketId;
+//  if (isBucketed()) {
+//    bucketId = bucketIds_[row];
+//  }
+  return IcebergWriterId{partitionId, bucketId};
+}
+
+void IcebergDataSink::updatePartitionRows(
+    uint32_t index,
+    vector_size_t numRows,
+    vector_size_t row) {
+  VELOX_DCHECK_LT(index, partitionSizes_.size());
+  VELOX_DCHECK_EQ(partitionSizes_.size(), partitionRows_.size());
+  VELOX_DCHECK_EQ(partitionRows_.size(), rawPartitionRows_.size());
+  if (FOLLY_UNLIKELY(partitionRows_[index] == nullptr) ||
+      (partitionRows_[index]->capacity() < numRows * sizeof(vector_size_t))) {
+    partitionRows_[index] =
+        allocateIndices(numRows, connectorQueryCtx_->memoryPool());
+    rawPartitionRows_[index] =
+        partitionRows_[index]->asMutable<vector_size_t>();
+  }
+  rawPartitionRows_[index][partitionSizes_[index]] = row;
+  ++partitionSizes_[index];
+}
+
+void IcebergDataSink::splitInputRowsAndEnsureWriters(RowVectorPtr /* input */) {
+  VELOX_CHECK(isPartitioned());
+  if (isPartitioned()) {
+    VELOX_CHECK_EQ(bucketIds_.size(), partitionIds_.size());
+  }
+
+  std::fill(partitionSizes_.begin(), partitionSizes_.end(), 0);
+
+  const auto numRows =
+      isPartitioned() ? partitionIds_.size() : bucketIds_.size();
+  for (auto row = 0; row < numRows; ++row) {
+    const auto id = getWriterId(row);
+    const uint32_t index = ensureWriter(id);
+
+    updatePartitionRows(index, numRows, row);
+  }
+
+  for (uint32_t i = 0; i < partitionSizes_.size(); ++i) {
+    if (partitionSizes_[i] != 0) {
+      VELOX_CHECK_NOT_NULL(partitionRows_[i]);
+      partitionRows_[i]->setSize(partitionSizes_[i] * sizeof(vector_size_t));
+    }
+  }
+}
+
+std::string IcebergDataSink::makePartitionDirectory(
+    const std::string& tableDirectory,
+    const std::optional<std::string>& partitionSubdirectory) const {
+  if (partitionSubdirectory.has_value()) {
+    return fs::path(tableDirectory) / partitionSubdirectory.value();
+  }
+  return tableDirectory;
+}
+
+IcebergWriterParameters IcebergDataSink::getWriterParameters(
+    const std::optional<std::string>& partition,
+    std::optional<uint32_t> bucketId) const {
+  auto [targetFileName, writeFileName] = getWriterFileNames(bucketId);
+
+  return IcebergWriterParameters{
+      updateMode_,
+      partition,
+      targetFileName,
+      makePartitionDirectory(
+          insertTableHandle_->locationHandle()->targetPath(), partition),
+      writeFileName,
+      makePartitionDirectory(
+          insertTableHandle_->locationHandle()->writePath(), partition)};
+}
+
+std::pair<std::string, std::string> IcebergDataSink::getWriterFileNames(
+    std::optional<uint32_t> bucketId) const {
+  if (auto icebergInsertFileNameGenerator =
+          std::dynamic_pointer_cast<const IcebergInsertFileNameGenerator>(
+              fileNameGenerator_)) {
+    return icebergInsertFileNameGenerator->gen(
+        bucketId,
+        insertTableHandle_,
+        *connectorQueryCtx_,
+        icebergConfig_,
+        isCommitRequired());
+  }
+
+  return fileNameGenerator_->gen(
+      bucketId, insertTableHandle_, *connectorQueryCtx_, isCommitRequired());
+}
+
+std::pair<std::string, std::string> IcebergInsertFileNameGenerator::gen(
+    std::optional<uint32_t> bucketId,
+    const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+    const ConnectorQueryCtx& connectorQueryCtx,
+    bool commitRequired) const {
+  auto defaulticebergConfig =
+      std::make_shared<const iceberg::IcebergConfig>(std::make_shared<config::ConfigBase>(
+          std::unordered_map<std::string, std::string>()));
+
+  return this->gen(
+      bucketId,
+      insertTableHandle,
+      connectorQueryCtx,
+      defaulticebergConfig,
+      commitRequired);
+}
+
+std::string makeUuid() {
+  return boost::lexical_cast<std::string>(boost::uuids::random_generator()());
+}
+
+std::pair<std::string, std::string> IcebergInsertFileNameGenerator::gen(
+    std::optional<uint32_t> bucketId,
+    const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+    const ConnectorQueryCtx& connectorQueryCtx,
+    const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig,
+    bool commitRequired) const {
+  auto targetFileName = insertTableHandle->locationHandle()->targetFileName();
+  const bool generateFileName = targetFileName.empty();
+  if (bucketId.has_value()) {
+    VELOX_CHECK(generateFileName);
+    // TODO: add hive.file_renaming_enabled support.
+    targetFileName = computeBucketedFileName(
+        connectorQueryCtx.queryId(),
+//        icebergConfig->maxBucketCount(connectorQueryCtx.sessionProperties()),
+        100,
+        bucketId.value());
+  } else if (generateFileName) {
+    // targetFileName includes planNodeId and Uuid. As a result, different
+    // table writers run by the same task driver or the same table writer
+    // run in different task tries would have different targetFileNames.
+    targetFileName = fmt::format(
+        "{}_{}_{}_{}",
+        connectorQueryCtx.taskId(),
+        connectorQueryCtx.driverId(),
+        connectorQueryCtx.planNodeId(),
+        makeUuid());
+  }
+  VELOX_CHECK(!targetFileName.empty());
+  const std::string writeFileName = commitRequired
+      ? fmt::format(".tmp.velox.{}_{}", targetFileName, makeUuid())
+      : targetFileName;
+  if (generateFileName &&
+      insertTableHandle->storageFormat() == dwio::common::FileFormat::PARQUET) {
+    return {
+        fmt::format("{}{}", targetFileName, ".parquet"),
+        fmt::format("{}{}", writeFileName, ".parquet")};
+  }
+  return {targetFileName, writeFileName};
+}
+
+folly::dynamic IcebergInsertFileNameGenerator::serialize() const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = "IcebergInsertFileNameGenerator";
+  return obj;
+}
+
+std::shared_ptr<IcebergInsertFileNameGenerator>
+IcebergInsertFileNameGenerator::deserialize(
+    const folly::dynamic& /* obj */,
+    void* /* context */) {
+  return std::make_shared<IcebergInsertFileNameGenerator>();
+}
+
+void IcebergInsertFileNameGenerator::registerSerDe() {
+  auto& registry = DeserializationWithContextRegistryForSharedPtr();
+  registry.Register(
+      "IcebergInsertFileNameGenerator", IcebergInsertFileNameGenerator::deserialize);
+}
+
+std::string IcebergInsertFileNameGenerator::toString() const {
+  return "IcebergInsertFileNameGenerator";
+}
+
+IcebergWriterParameters::UpdateMode IcebergDataSink::getUpdateMode() const {
+  if (insertTableHandle_->isExistingTable()) {
+    if (insertTableHandle_->isPartitioned()) {
+      const auto insertBehavior = icebergConfig_->insertExistingPartitionsBehavior(
+          connectorQueryCtx_->sessionProperties());
+      switch (insertBehavior) {
+        case iceberg::IcebergConfig::InsertExistingPartitionsBehavior::kOverwrite:
+          return IcebergWriterParameters::UpdateMode::kOverwrite;
+        case iceberg::IcebergConfig::InsertExistingPartitionsBehavior::kError:
+          return IcebergWriterParameters::UpdateMode::kNew;
+        default:
+          VELOX_UNSUPPORTED(
+              "Unsupported insert existing partitions behavior: {}",
+              iceberg::IcebergConfig::insertExistingPartitionsBehaviorString(
+                  insertBehavior));
+      }
+    } else {
+      if (icebergConfig_->immutablePartitions()) {
+        VELOX_USER_FAIL("Unpartitioned Hive tables are immutable.");
+      }
+      return IcebergWriterParameters::UpdateMode::kAppend;
+    }
+  } else {
+    return IcebergWriterParameters::UpdateMode::kNew;
+  }
+}
+
+bool IcebergInsertTableHandle::isPartitioned() const {
+  return std::any_of(
+      inputColumns_.begin(), inputColumns_.end(), [](auto column) {
+        auto inputColumnBase =
+            std::dynamic_pointer_cast<const ColumnHandleBase>(column);
+        VELOX_CHECK(inputColumnBase);
+        return inputColumnBase->isPartitionKey();
+      });
+}
+
+//const IcebergBucketProperty* IcebergInsertTableHandle::bucketProperty() const {
+//  return bucketProperty_.get();
+//}
+
+//bool IcebergInsertTableHandle::isBucketed() const {
+//  return bucketProperty() != nullptr;
+//}
+
+bool IcebergInsertTableHandle::isExistingTable() const {
+  return locationHandle_->tableType() == LocationHandle::TableType::kExisting;
+}
+
+folly::dynamic IcebergInsertTableHandle::serialize() const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = "IcebergInsertTableHandle";
+  folly::dynamic arr = folly::dynamic::array;
+  for (const auto& ic : inputColumns_) {
+    arr.push_back(ic->serialize());
+  }
+
+  obj["inputColumns"] = arr;
+  obj["locationHandle"] = locationHandle_->serialize();
+  obj["tableStorageFormat"] = dwio::common::toString(storageFormat_);
+
+//  if (bucketProperty_) {
+//    obj["bucketProperty"] = bucketProperty_->serialize();
+//  }
+
+  if (compressionKind_.has_value()) {
+    obj["compressionKind"] = velox::common::compressionKindToString(*compressionKind_);
+  }
+
+  folly::dynamic params = folly::dynamic::object;
+  for (const auto& [key, value] : serdeParameters_) {
+    params[key] = value;
+  }
+  obj["serdeParameters"] = params;
+  obj["ensureFiles"] = ensureFiles_;
+  obj["fileNameGenerator"] = fileNameGenerator_->serialize();
+  return obj;
+}
+
+IcebergInsertTableHandlePtr IcebergInsertTableHandle::create(
+    const folly::dynamic& obj) {
+  auto inputColumns = ISerializable::deserialize<std::vector<ColumnHandle>>(
+      obj["inputColumns"]);
+  auto locationHandle =
+      ISerializable::deserialize<LocationHandle>(obj["locationHandle"]);
+  auto storageFormat =
+      dwio::common::toFileFormat(obj["tableStorageFormat"].asString());
+
+  std::optional<velox::common::CompressionKind> compressionKind = std::nullopt;
+  if (obj.count("compressionKind") > 0) {
+    compressionKind =
+        velox::common::stringToCompressionKind(obj["compressionKind"].asString());
+  }
+
+//  std::shared_ptr<const IcebergBucketProperty> bucketProperty;
+//  if (obj.count("bucketProperty") > 0) {
+//    bucketProperty =
+//        ISerializable::deserialize<IcebergBucketProperty>(obj["bucketProperty"]);
+//  }
+
+  std::unordered_map<std::string, std::string> serdeParameters;
+  for (const auto& pair : obj["serdeParameters"].items()) {
+    serdeParameters.emplace(pair.first.asString(), pair.second.asString());
+  }
+
+  bool ensureFiles = obj["ensureFiles"].asBool();
+
+  auto fileNameGenerator =
+      ISerializable::deserialize<FileNameGenerator>(obj["fileNameGenerator"]);
+  return std::make_shared<IcebergInsertTableHandle>(
+      inputColumns,
+      locationHandle,
+      storageFormat,
+//      bucketProperty,
+      compressionKind,
+      serdeParameters,
+      nullptr, // writerOptions is not serializable
+      ensureFiles,
+      fileNameGenerator);
+}
+
+void IcebergInsertTableHandle::registerSerDe() {
+  auto& registry = DeserializationRegistryForSharedPtr();
+  registry.Register("IcebergInsertTableHandle", IcebergInsertTableHandle::create);
+}
+
+std::string IcebergInsertTableHandle::toString() const {
+  std::ostringstream out;
+  out << "IcebergInsertTableHandle [" << dwio::common::toString(storageFormat_);
+  if (compressionKind_.has_value()) {
+    out << " " << velox::common::compressionKindToString(compressionKind_.value());
+  } else {
+    out << " none";
+  }
+  out << "], [inputColumns: [";
+  for (const auto& column : inputColumns_) {
+    auto columnBase =
+        std::dynamic_pointer_cast<const ColumnHandleBase>(column);
+    VELOX_CHECK(columnBase);
+    out << " " << columnBase->toString();
+  }
+  out << " ], locationHandle: " << locationHandle_->toString();
+//  if (bucketProperty_) {
+//    out << ", bucketProperty: " << bucketProperty_->toString();
+//  }
+
+  if (serdeParameters_.size() > 0) {
+    std::map<std::string, std::string> sortedSerdeParams(
+        serdeParameters_.begin(), serdeParameters_.end());
+    out << ", serdeParameters: ";
+    for (const auto& [key, value] : sortedSerdeParams) {
+      out << "[" << key << ", " << value << "] ";
+    }
+  }
+  out << ", fileNameGenerator: " << fileNameGenerator_->toString();
+  out << "]";
+  return out.str();
+}
+
+std::string LocationHandle::toString() const {
+  return fmt::format(
+      "LocationHandle [targetPath: {}, writePath: {}, tableType: {}, tableFileName: {}]",
+      targetPath_,
+      writePath_,
+      tableTypeName(tableType_),
+      targetFileName_);
+}
+
+void LocationHandle::registerSerDe() {
+  auto& registry = DeserializationRegistryForSharedPtr();
+  registry.Register("LocationHandle", LocationHandle::create);
+}
+
+folly::dynamic LocationHandle::serialize() const {
+  folly::dynamic obj = folly::dynamic::object;
+  obj["name"] = "LocationHandle";
+  obj["targetPath"] = targetPath_;
+  obj["writePath"] = writePath_;
+  obj["tableType"] = tableTypeName(tableType_);
+  obj["targetFileName"] = targetFileName_;
+  return obj;
+}
+
+LocationHandlePtr LocationHandle::create(const folly::dynamic& obj) {
+  auto targetPath = obj["targetPath"].asString();
+  auto writePath = obj["writePath"].asString();
+  auto tableType = tableTypeFromName(obj["tableType"].asString());
+  auto targetFileName = obj["targetFileName"].asString();
+  return std::make_shared<LocationHandle>(
+      targetPath, writePath, tableType, targetFileName);
+}
+
+std::unique_ptr<memory::MemoryReclaimer> IcebergDataSink::WriterReclaimer::create(
+    IcebergDataSink* dataSink,
+    IcebergWriterInfo* writerInfo,
+    io::IoStatistics* ioStats) {
+  return std::unique_ptr<memory::MemoryReclaimer>(
+      new IcebergDataSink::WriterReclaimer(dataSink, writerInfo, ioStats));
+}
+
+bool IcebergDataSink::WriterReclaimer::reclaimableBytes(
+    const memory::MemoryPool& pool,
+    uint64_t& reclaimableBytes) const {
+  VELOX_CHECK_EQ(pool.name(), writerInfo_->writerPool->name());
+  reclaimableBytes = 0;
+  if (!dataSink_->canReclaim()) {
+    return false;
+  }
+  return exec::MemoryReclaimer::reclaimableBytes(pool, reclaimableBytes);
+}
+
+uint64_t IcebergDataSink::WriterReclaimer::reclaim(
+    memory::MemoryPool* pool,
+    uint64_t targetBytes,
+    uint64_t maxWaitMs,
+    memory::MemoryReclaimer::Stats& stats) {
+  VELOX_CHECK_EQ(pool->name(), writerInfo_->writerPool->name());
+  if (!dataSink_->canReclaim()) {
+    return 0;
+  }
+
+  if (*writerInfo_->nonReclaimableSectionHolder.get()) {
+    RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
+    LOG(WARNING) << "Can't reclaim from hive writer pool " << pool->name()
+                 << " which is under non-reclaimable section, "
+                 << " reserved memory: "
+                 << succinctBytes(pool->reservedBytes());
+    ++stats.numNonReclaimableAttempts;
+    return 0;
+  }
+
+  const uint64_t memoryUsageBeforeReclaim = pool->reservedBytes();
+  const std::string memoryUsageTreeBeforeReclaim = pool->treeMemoryUsage();
+  const auto writtenBytesBeforeReclaim = ioStats_->rawBytesWritten();
+  const auto reclaimedBytes =
+      exec::MemoryReclaimer::reclaim(pool, targetBytes, maxWaitMs, stats);
+  const auto earlyFlushedRawBytes =
+      ioStats_->rawBytesWritten() - writtenBytesBeforeReclaim;
+  addThreadLocalRuntimeStat(
+      kEarlyFlushedRawBytes,
+      RuntimeCounter(earlyFlushedRawBytes, RuntimeCounter::Unit::kBytes));
+  if (earlyFlushedRawBytes > 0) {
+    RECORD_METRIC_VALUE(
+        kMetricFileWriterEarlyFlushedRawBytes, earlyFlushedRawBytes);
+  }
+  const uint64_t memoryUsageAfterReclaim = pool->reservedBytes();
+  if (memoryUsageAfterReclaim > memoryUsageBeforeReclaim) {
+    VELOX_FAIL(
+        "Unexpected memory growth after memory reclaim from {}, the memory usage before reclaim: {}, after reclaim: {}\nThe memory tree usage before reclaim:\n{}\nThe memory tree usage after reclaim:\n{}",
+        pool->name(),
+        succinctBytes(memoryUsageBeforeReclaim),
+        succinctBytes(memoryUsageAfterReclaim),
+        memoryUsageTreeBeforeReclaim,
+        pool->treeMemoryUsage());
+  }
+  return reclaimedBytes;
+}
+} // namespace facebook::velox::connector::lakehouse::common
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDataSink.h b/velox/connectors/lakehouse/iceberg/IcebergDataSink.h
new file mode 100644
index 000000000000..310a4941d79a
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDataSink.h
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "TableHandleBase.h"
+#include "velox/common/compression/Compression.h"
+#include "velox/connectors/Connector.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConfig.h"
+#include "velox/connectors/lakehouse/iceberg/PartitionIdGenerator.h"
+#include "velox/dwio/common/Options.h"
+#include "velox/dwio/common/Writer.h"
+#include "velox/dwio/common/WriterFactory.h"
+#include "velox/exec/MemoryReclaimer.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class LocationHandle;
+using LocationHandlePtr = std::shared_ptr<const LocationHandle>;
+
+/// Location related properties of the Hive table to be written.
+class LocationHandle : public ISerializable {
+ public:
+  enum class TableType {
+    /// Write to a new table to be created.
+    kNew,
+    /// Write to an existing table.
+    kExisting,
+  };
+
+  LocationHandle(
+      std::string targetPath,
+      std::string writePath,
+      TableType tableType,
+      std::string targetFileName = "")
+      : targetPath_(std::move(targetPath)),
+        targetFileName_(std::move(targetFileName)),
+        writePath_(std::move(writePath)),
+        tableType_(tableType) {}
+
+  const std::string& targetPath() const {
+    return targetPath_;
+  }
+
+  const std::string& targetFileName() const {
+    return targetFileName_;
+  }
+
+  const std::string& writePath() const {
+    return writePath_;
+  }
+
+  TableType tableType() const {
+    return tableType_;
+  }
+
+  std::string toString() const;
+
+  static void registerSerDe();
+
+  folly::dynamic serialize() const override;
+
+  static LocationHandlePtr create(const folly::dynamic& obj);
+
+  static const std::string tableTypeName(LocationHandle::TableType type);
+
+  static LocationHandle::TableType tableTypeFromName(const std::string& name);
+
+ private:
+  // Target directory path.
+  const std::string targetPath_;
+  // If non-empty, use this name instead of generating our own.
+  const std::string targetFileName_;
+  // Staging directory path.
+  const std::string writePath_;
+  // Whether the table to be written is new, already existing or temporary.
+  const TableType tableType_;
+};
+
+class IcebergSortingColumn : public ISerializable {
+ public:
+  IcebergSortingColumn(
+      const std::string& sortColumn,
+      const core::SortOrder& sortOrder);
+
+  const std::string& sortColumn() const {
+    return sortColumn_;
+  }
+
+  core::SortOrder sortOrder() const {
+    return sortOrder_;
+  }
+
+  folly::dynamic serialize() const override;
+
+  static std::shared_ptr<IcebergSortingColumn> deserialize(
+      const folly::dynamic& obj,
+      void* context);
+
+  std::string toString() const;
+
+  static void registerSerDe();
+
+ private:
+  const std::string sortColumn_;
+  const core::SortOrder sortOrder_;
+};
+
+class IcebergInsertTableHandle;
+using IcebergInsertTableHandlePtr = std::shared_ptr<IcebergInsertTableHandle>;
+
+class FileNameGenerator : public ISerializable {
+ public:
+  virtual ~FileNameGenerator() = default;
+
+  virtual std::pair<std::string, std::string> gen(
+      std::optional<uint32_t> bucketId,
+      const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+      const ConnectorQueryCtx& connectorQueryCtx,
+      bool commitRequired) const = 0;
+
+  virtual std::string toString() const = 0;
+};
+
+class IcebergInsertFileNameGenerator : public FileNameGenerator {
+ public:
+  IcebergInsertFileNameGenerator() {}
+
+  std::pair<std::string, std::string> gen(
+      std::optional<uint32_t> bucketId,
+      const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+      const ConnectorQueryCtx& connectorQueryCtx,
+      bool commitRequired) const override;
+
+  /// Version of file generation that takes icebergConfig into account when
+  /// generating file names
+  std::pair<std::string, std::string> gen(
+      std::optional<uint32_t> bucketId,
+      const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+      const ConnectorQueryCtx& connectorQueryCtx,
+      const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig,
+      bool commitRequired) const;
+
+  static void registerSerDe();
+
+  folly::dynamic serialize() const override;
+
+  static std::shared_ptr<IcebergInsertFileNameGenerator> deserialize(
+      const folly::dynamic& obj,
+      void* context);
+
+  std::string toString() const override;
+};
+
+/// Represents a request for Hive write.
+class IcebergInsertTableHandle : public ConnectorInsertTableHandle {
+ public:
+  IcebergInsertTableHandle(
+      std::vector<ColumnHandlePtr> inputColumns,
+      std::shared_ptr<const LocationHandle> locationHandle,
+      dwio::common::FileFormat storageFormat = dwio::common::FileFormat::DWRF,
+      std::optional<velox::common::CompressionKind> compressionKind = {},
+      const std::unordered_map<std::string, std::string>& serdeParameters = {},
+      const std::shared_ptr<dwio::common::WriterOptions>& writerOptions =
+          nullptr,
+      // When this option is set the IcebergDataSink will always write a file even
+      // if there's no data. This is useful when the table is bucketed, but the
+      // engine handles ensuring a 1 to 1 mapping from task to bucket.
+      const bool ensureFiles = false,
+      std::shared_ptr<const FileNameGenerator> fileNameGenerator =
+          std::make_shared<const IcebergInsertFileNameGenerator>())
+      : inputColumns_(std::move(inputColumns)),
+        locationHandle_(std::move(locationHandle)),
+        storageFormat_(storageFormat),
+//        bucketProperty_(std::move(bucketProperty)),
+        compressionKind_(compressionKind),
+        serdeParameters_(serdeParameters),
+        writerOptions_(writerOptions),
+        ensureFiles_(ensureFiles),
+        fileNameGenerator_(std::move(fileNameGenerator)) {
+    if (compressionKind.has_value()) {
+      VELOX_CHECK(
+          compressionKind.value() != velox::common::CompressionKind_MAX,
+          "Unsupported compression type: CompressionKind_MAX");
+    }
+
+    if (ensureFiles_) {
+      for (const auto& inputColumn : inputColumns_) {
+        auto inputColumnBase =
+            std::dynamic_pointer_cast<const ColumnHandleBase>(inputColumn);
+        VELOX_CHECK(
+            inputColumnBase,
+            "{}} is not ColumnHandleBase",
+            inputColumn->name());
+        VELOX_CHECK(
+            !inputColumnBase->isPartitionKey(),
+            "ensureFiles is not supported with partition keys in the data");
+      }
+    }
+  }
+
+  virtual ~IcebergInsertTableHandle() = default;
+
+  const std::vector<ColumnHandlePtr>& inputColumns() const {
+    return inputColumns_;
+  }
+
+  const std::shared_ptr<const LocationHandle>& locationHandle() const {
+    return locationHandle_;
+  }
+
+  std::optional<velox::common::CompressionKind> compressionKind() const {
+    return compressionKind_;
+  }
+
+  dwio::common::FileFormat storageFormat() const {
+    return storageFormat_;
+  }
+
+  const std::unordered_map<std::string, std::string>& serdeParameters() const {
+    return serdeParameters_;
+  }
+
+  const std::shared_ptr<dwio::common::WriterOptions>& writerOptions() const {
+    return writerOptions_;
+  }
+
+  bool ensureFiles() const {
+    return ensureFiles_;
+  }
+
+  const std::shared_ptr<const FileNameGenerator>& fileNameGenerator() const {
+    return fileNameGenerator_;
+  }
+
+  bool supportsMultiThreading() const override {
+    return true;
+  }
+
+  bool isPartitioned() const;
+
+  bool isBucketed() const;
+
+//  const IcebergBucketProperty* bucketProperty() const;
+
+  bool isExistingTable() const;
+
+  folly::dynamic serialize() const override;
+
+  static IcebergInsertTableHandlePtr create(const folly::dynamic& obj);
+
+  static void registerSerDe();
+
+  std::string toString() const override;
+
+ private:
+  const std::vector<ColumnHandlePtr> inputColumns_;
+  const std::shared_ptr<const LocationHandle> locationHandle_;
+  const dwio::common::FileFormat storageFormat_;
+//  const std::shared_ptr<const IcebergBucketProperty> bucketProperty_;
+  const std::optional<velox::common::CompressionKind> compressionKind_;
+  const std::unordered_map<std::string, std::string> serdeParameters_;
+  const std::shared_ptr<dwio::common::WriterOptions> writerOptions_;
+  const bool ensureFiles_;
+  const std::shared_ptr<const FileNameGenerator> fileNameGenerator_;
+};
+
+/// Parameters for Hive writers.
+class IcebergWriterParameters {
+ public:
+  enum class UpdateMode {
+    kNew, // Write files to a new directory.
+    kOverwrite, // Overwrite an existing directory.
+    // Append mode is currently only supported for unpartitioned tables.
+    kAppend, // Append to an unpartitioned table.
+  };
+
+  /// @param updateMode Write the files to a new directory, or append to an
+  /// existing directory or overwrite an existing directory.
+  /// @param partitionName Partition name in the typical Hive style, which is
+  /// also the partition subdirectory part of the partition path.
+  /// @param targetFileName The final name of a file after committing.
+  /// @param targetDirectory The final directory that a file should be in after
+  /// committing.
+  /// @param writeFileName The temporary name of the file that a running writer
+  /// writes to. If a running writer writes directory to the target file, set
+  /// writeFileName to targetFileName by default.
+  /// @param writeDirectory The temporary directory that a running writer writes
+  /// to. If a running writer writes directory to the target directory, set
+  /// writeDirectory to targetDirectory by default.
+  IcebergWriterParameters(
+      UpdateMode updateMode,
+      std::optional<std::string> partitionName,
+      std::string targetFileName,
+      std::string targetDirectory,
+      std::optional<std::string> writeFileName = std::nullopt,
+      std::optional<std::string> writeDirectory = std::nullopt)
+      : updateMode_(updateMode),
+        partitionName_(std::move(partitionName)),
+        targetFileName_(std::move(targetFileName)),
+        targetDirectory_(std::move(targetDirectory)),
+        writeFileName_(writeFileName.value_or(targetFileName_)),
+        writeDirectory_(writeDirectory.value_or(targetDirectory_)) {}
+
+  UpdateMode updateMode() const {
+    return updateMode_;
+  }
+
+  static std::string updateModeToString(UpdateMode updateMode) {
+    switch (updateMode) {
+      case UpdateMode::kNew:
+        return "NEW";
+      case UpdateMode::kOverwrite:
+        return "OVERWRITE";
+      case UpdateMode::kAppend:
+        return "APPEND";
+      default:
+        VELOX_UNSUPPORTED("Unsupported update mode.");
+    }
+  }
+
+  const std::optional<std::string>& partitionName() const {
+    return partitionName_;
+  }
+
+  const std::string& targetFileName() const {
+    return targetFileName_;
+  }
+
+  const std::string& writeFileName() const {
+    return writeFileName_;
+  }
+
+  const std::string& targetDirectory() const {
+    return targetDirectory_;
+  }
+
+  const std::string& writeDirectory() const {
+    return writeDirectory_;
+  }
+
+ private:
+  const UpdateMode updateMode_;
+  const std::optional<std::string> partitionName_;
+  const std::string targetFileName_;
+  const std::string targetDirectory_;
+  const std::string writeFileName_;
+  const std::string writeDirectory_;
+};
+
+struct IcebergWriterInfo {
+  IcebergWriterInfo(
+      IcebergWriterParameters parameters,
+      std::shared_ptr<memory::MemoryPool> _writerPool,
+      std::shared_ptr<memory::MemoryPool> _sinkPool,
+      std::shared_ptr<memory::MemoryPool> _sortPool)
+      : writerParameters(std::move(parameters)),
+        nonReclaimableSectionHolder(new tsan_atomic<bool>(false)),
+        spillStats(
+            std::make_unique<folly::Synchronized<velox::common::SpillStats>>()),
+        writerPool(std::move(_writerPool)),
+        sinkPool(std::move(_sinkPool)),
+        sortPool(std::move(_sortPool)) {}
+
+  const IcebergWriterParameters writerParameters;
+  const std::unique_ptr<tsan_atomic<bool>> nonReclaimableSectionHolder;
+  /// Collects the spill stats from sort writer if the spilling has been
+  /// triggered.
+  const std::unique_ptr<folly::Synchronized<velox::common::SpillStats>>
+      spillStats;
+  const std::shared_ptr<memory::MemoryPool> writerPool;
+  const std::shared_ptr<memory::MemoryPool> sinkPool;
+  const std::shared_ptr<memory::MemoryPool> sortPool;
+  int64_t numWrittenRows = 0;
+  int64_t inputSizeInBytes = 0;
+};
+
+/// Identifies a hive writer.
+struct IcebergWriterId {
+  std::optional<uint32_t> partitionId{std::nullopt};
+  std::optional<uint32_t> bucketId{std::nullopt};
+
+  IcebergWriterId() = default;
+
+  IcebergWriterId(
+      std::optional<uint32_t> _partitionId,
+      std::optional<uint32_t> _bucketId = std::nullopt)
+      : partitionId(_partitionId), bucketId(_bucketId) {}
+
+  /// Returns the special writer id for the un-partitioned (and non-bucketed)
+  /// table.
+  static const IcebergWriterId& unpartitionedId();
+
+  std::string toString() const;
+
+  bool operator==(const IcebergWriterId& other) const {
+    return std::tie(partitionId, bucketId) ==
+        std::tie(other.partitionId, other.bucketId);
+  }
+};
+
+struct IcebergWriterIdHasher {
+  std::size_t operator()(const IcebergWriterId& id) const {
+    return bits::hashMix(
+        id.partitionId.value_or(std::numeric_limits<uint32_t>::max()),
+        id.bucketId.value_or(std::numeric_limits<uint32_t>::max()));
+  }
+};
+
+struct IcebergWriterIdEq {
+  bool operator()(const IcebergWriterId& lhs, const IcebergWriterId& rhs) const {
+    return lhs == rhs;
+  }
+};
+
+class IcebergDataSink : public DataSink {
+ public:
+  /// The list of runtime stats reported by hive data sink
+  static constexpr const char* kEarlyFlushedRawBytes = "earlyFlushedRawBytes";
+
+  /// Defines the execution states of a hive data sink running internally.
+  enum class State {
+    /// The data sink accepts new append data in this state.
+    kRunning = 0,
+    /// The data sink flushes any buffered data to the underlying file writer
+    /// but no more data can be appended.
+    kFinishing = 1,
+    /// The data sink is aborted on error and no more data can be appended.
+    kAborted = 2,
+    /// The data sink is closed on error and no more data can be appended.
+    kClosed = 3
+  };
+  static std::string stateString(State state);
+
+  IcebergDataSink(
+      RowTypePtr inputType,
+      std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      CommitStrategy commitStrategy,
+      const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig);
+
+  IcebergDataSink(
+      RowTypePtr inputType,
+      std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      CommitStrategy commitStrategy,
+      const std::shared_ptr<const iceberg::IcebergConfig>& icebergConfig,
+      const std::vector<column_index_t>& dataChannels);
+
+  void appendData(RowVectorPtr input) override;
+
+  bool finish() override;
+
+  Stats stats() const override;
+
+  std::vector<std::string> close() override;
+
+  void abort() override;
+
+  bool canReclaim() const;
+
+ protected:
+  // Validates the state transition from 'oldState' to 'newState'.
+  void checkStateTransition(State oldState, State newState);
+
+  void setState(State newState);
+
+  virtual std::vector<std::string> commitMessage() const;
+
+  class WriterReclaimer : public exec::MemoryReclaimer {
+   public:
+    static std::unique_ptr<memory::MemoryReclaimer> create(
+        IcebergDataSink* dataSink,
+        IcebergWriterInfo* writerInfo,
+        io::IoStatistics* ioStats);
+
+    bool reclaimableBytes(
+        const memory::MemoryPool& pool,
+        uint64_t& reclaimableBytes) const override;
+
+    uint64_t reclaim(
+        memory::MemoryPool* pool,
+        uint64_t targetBytes,
+        uint64_t maxWaitMs,
+        memory::MemoryReclaimer::Stats& stats) override;
+
+   private:
+    WriterReclaimer(
+        IcebergDataSink* dataSink,
+        IcebergWriterInfo* writerInfo,
+        io::IoStatistics* ioStats)
+        : exec::MemoryReclaimer(0),
+          dataSink_(dataSink),
+          writerInfo_(writerInfo),
+          ioStats_(ioStats) {
+      VELOX_CHECK_NOT_NULL(dataSink_);
+      VELOX_CHECK_NOT_NULL(writerInfo_);
+      VELOX_CHECK_NOT_NULL(ioStats_);
+    }
+
+    IcebergDataSink* const dataSink_;
+    IcebergWriterInfo* const writerInfo_;
+    io::IoStatistics* const ioStats_;
+  };
+
+  FOLLY_ALWAYS_INLINE bool sortWrite() const {
+    return !sortColumnIndices_.empty();
+  }
+
+  // Returns true if the table is partitioned.
+  FOLLY_ALWAYS_INLINE bool isPartitioned() const {
+    return partitionIdGenerator_ != nullptr;
+  }
+
+  // Returns true if the table is bucketed.
+//  FOLLY_ALWAYS_INLINE bool isBucketed() const {
+//    return bucketCount_ != 0;
+//  }
+
+  FOLLY_ALWAYS_INLINE bool isCommitRequired() const {
+    return commitStrategy_ != CommitStrategy::kNoCommit;
+  }
+
+  std::shared_ptr<memory::MemoryPool> createWriterPool(
+      const IcebergWriterId& writerId);
+
+  void setMemoryReclaimers(
+      IcebergWriterInfo* writerInfo,
+      io::IoStatistics* ioStats);
+
+  // Compute the partition id and bucket id for each row in 'input'.
+  void computePartitionAndBucketIds(const RowVectorPtr& input);
+
+  // Get the hive writer id corresponding to the row
+  // from partitionIds and bucketIds.
+  FOLLY_ALWAYS_INLINE IcebergWriterId getWriterId(size_t row) const;
+
+  // Computes the number of input rows as well as the actual input row indices
+  // to each corresponding (bucketed) partition based on the partition and
+  // bucket ids calculated by 'computePartitionAndBucketIds'. The function also
+  // ensures that there is a writer created for each (bucketed) partition.
+  virtual void splitInputRowsAndEnsureWriters(RowVectorPtr input);
+
+  // Makes sure the writer is created for the given writer id. The function
+  // returns the corresponding index in 'writers_'.
+  uint32_t ensureWriter(const IcebergWriterId& id);
+
+  // Appends a new writer for the given 'id'. The function returns the index of
+  // the newly created writer in 'writers_'.
+  uint32_t appendWriter(const IcebergWriterId& id);
+
+  virtual std::optional<std::string> getPartitionName(
+      const IcebergWriterId& id) const;
+
+  std::unique_ptr<facebook::velox::dwio::common::Writer>
+  maybeCreateBucketSortWriter(
+      std::unique_ptr<facebook::velox::dwio::common::Writer> writer);
+
+  std::string makePartitionDirectory(
+      const std::string& tableDirectory,
+      const std::optional<std::string>& partitionSubdirectory) const;
+
+  void
+  updatePartitionRows(uint32_t index, vector_size_t numRows, vector_size_t row);
+
+  void extendBuffersForPartitionedTables();
+
+  IcebergWriterParameters getWriterParameters(
+      const std::optional<std::string>& partition,
+      std::optional<uint32_t> bucketId) const;
+
+  // Gets write and target file names for a writer based on the table commit
+  // strategy as well as table partitioned type. If commit is not required, the
+  // write file and target file has the same name. If not, add a temp file
+  // prefix to the target file for write file name. The coordinator (or driver
+  // for Presto on spark) will rename the write file to target file to commit
+  // the table write when update the metadata store. If it is a bucketed table,
+  // the file name encodes the corresponding bucket id.
+  std::pair<std::string, std::string> getWriterFileNames(
+      std::optional<uint32_t> bucketId) const;
+
+  IcebergWriterParameters::UpdateMode getUpdateMode() const;
+
+  FOLLY_ALWAYS_INLINE void checkRunning() const {
+    VELOX_CHECK_EQ(state_, State::kRunning, "Hive data sink is not running");
+  }
+
+  // Invoked to write 'input' to the specified file writer.
+  void write(size_t index, RowVectorPtr input);
+
+  void closeInternal();
+
+  const RowTypePtr inputType_;
+  const std::shared_ptr<const IcebergInsertTableHandle> insertTableHandle_;
+  const ConnectorQueryCtx* const connectorQueryCtx_;
+  const CommitStrategy commitStrategy_;
+  const std::shared_ptr<const iceberg::IcebergConfig> icebergConfig_;
+  const IcebergWriterParameters::UpdateMode updateMode_;
+  const uint32_t maxOpenWriters_;
+  const std::vector<column_index_t> partitionChannels_;
+  const std::unique_ptr<PartitionIdGenerator> partitionIdGenerator_;
+  // Indices of dataChannel are stored in ascending order
+  const std::vector<column_index_t> dataChannels_;
+//  const int32_t bucketCount_{0};
+//  const std::unique_ptr<core::PartitionFunction> bucketFunction_;
+  const std::shared_ptr<dwio::common::WriterFactory> writerFactory_;
+  const velox::common::SpillConfig* const spillConfig_;
+  const uint64_t sortWriterFinishTimeSliceLimitMs_{0};
+
+  std::vector<column_index_t> sortColumnIndices_;
+  std::vector<CompareFlags> sortCompareFlags_;
+
+  State state_{State::kRunning};
+
+  tsan_atomic<bool> nonReclaimableSection_{false};
+
+  // The map from writer id to the writer index in 'writers_' and 'writerInfo_'.
+  folly::F14FastMap<IcebergWriterId, uint32_t, IcebergWriterIdHasher, IcebergWriterIdEq>
+      writerIndexMap_;
+
+  // Below are structures for partitions from all inputs. writerInfo_ and
+  // writers_ are both indexed by partitionId.
+  std::vector<std::shared_ptr<IcebergWriterInfo>> writerInfo_;
+  std::vector<std::unique_ptr<dwio::common::Writer>> writers_;
+  // IO statistics collected for each writer.
+  std::vector<std::shared_ptr<io::IoStatistics>> ioStats_;
+
+  // Below are structures updated when processing current input. partitionIds_
+  // are indexed by the row of input_. partitionRows_, rawPartitionRows_ and
+  // partitionSizes_ are indexed by partitionId.
+  raw_vector<uint64_t> partitionIds_;
+  std::vector<BufferPtr> partitionRows_;
+  std::vector<vector_size_t*> rawPartitionRows_;
+  std::vector<vector_size_t> partitionSizes_;
+
+  // Reusable buffers for bucket id calculations.
+  std::vector<uint32_t> bucketIds_;
+
+  // Strategy for naming writer files
+  std::shared_ptr<const FileNameGenerator> fileNameGenerator_;
+};
+
+FOLLY_ALWAYS_INLINE std::ostream& operator<<(
+    std::ostream& os,
+    IcebergDataSink::State state) {
+  os << IcebergDataSink::stateString(state);
+  return os;
+}
+} // namespace facebook::velox::connector::lakehouse::iceberg
+
+template <>
+struct fmt::formatter<
+    facebook::velox::connector::lakehouse::iceberg::IcebergDataSink::State>
+    : formatter<std::string> {
+  auto format(
+      facebook::velox::connector::lakehouse::iceberg::IcebergDataSink::State s,
+      format_context& ctx) const {
+    return formatter<std::string>::format(
+        facebook::velox::connector::lakehouse::iceberg::IcebergDataSink::stateString(s),
+        ctx);
+  }
+};
+
+template <>
+struct fmt::formatter<
+    facebook::velox::connector::lakehouse::iceberg::LocationHandle::TableType>
+    : formatter<int> {
+  auto format(
+      facebook::velox::connector::lakehouse::iceberg::LocationHandle::TableType
+          s,
+      format_context& ctx) const {
+    return formatter<int>::format(static_cast<int>(s), ctx);
+  }
+};
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDataSource.cpp b/velox/connectors/lakehouse/iceberg/IcebergDataSource.cpp
new file mode 100644
index 000000000000..c43acd1495ea
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDataSource.cpp
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/IcebergDataSource.h"
+
+#include "velox/dwio/common/ReaderFactory.h"
+
+#include <string>
+#include <unordered_map>
+
+using facebook::velox::common::testutil::TestValue;
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+namespace {
+
+bool isMember(
+    const std::vector<exec::FieldReference*>& fields,
+    const exec::FieldReference& field) {
+  return std::find(fields.begin(), fields.end(), &field) != fields.end();
+}
+
+bool shouldEagerlyMaterialize(
+    const exec::Expr& remainingFilter,
+    const exec::FieldReference& field) {
+  if (!remainingFilter.evaluatesArgumentsOnNonIncreasingSelection()) {
+    return true;
+  }
+  for (auto& input : remainingFilter.inputs()) {
+    if (isMember(input->distinctFields(), field) && input->hasConditionals()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+} // namespace
+
+class IcebergTableHandle;
+class IcebergColumnHandle;
+
+IcebergDataSource::IcebergDataSource(
+    const RowTypePtr& outputType,
+    const ConnectorTableHandlePtr& tableHandle,
+    const connector::ColumnHandleMap& columnHandles,
+    FileHandleFactory* fileHandleFactory,
+    folly::Executor* executor,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<ConnectorConfigBase>& connectorConfig)
+    : DataSourceBase(
+          outputType,
+          tableHandle,
+          columnHandles,
+          fileHandleFactory,
+          executor,
+          connectorQueryCtx,
+          connectorConfig) {
+  tableHandle_ = std::dynamic_pointer_cast<const TableHandleBase>(tableHandle);
+  VELOX_CHECK_NOT_NULL(
+      tableHandle_,
+      "ConnectorTableHandle must be an instance of TableHandleBase for {}",
+      tableHandle->name());
+
+  //   Column handled keyed on the column alias, the name used in the query.
+  for (const auto& [canonicalizedName, columnHandle] : columnHandles) {
+    auto handle = std::dynamic_pointer_cast<const ColumnHandleBase>(columnHandle);
+    VELOX_CHECK_NOT_NULL(
+        handle,
+        "ColumnHandle must be an instance of HiveColumnHandle for {}",
+        canonicalizedName);
+    switch (handle->columnType()) {
+      case ColumnHandleBase::ColumnType::kRegular:
+        break;
+      case ColumnHandleBase::ColumnType::kPartitionKey:
+        partitionColumnHandles_.emplace(handle->name(), handle);
+        break;
+      case ColumnHandleBase::ColumnType::kSynthesized:
+        infoColumns_.emplace(handle->name(), handle);
+        break;
+      default:
+        break;
+    }
+  }
+
+  std::vector<std::string> readColumnNames;
+  auto readColumnTypes = outputType_->children();
+  for (const auto& outputName : outputType_->names()) {
+    auto it = columnHandles.find(outputName);
+    VELOX_CHECK(
+        it != columnHandles.end(),
+        "ColumnHandle is missing for output column: {}",
+        outputName);
+
+    auto* handle = static_cast<const ColumnHandleBase*>(it->second.get());
+    readColumnNames.push_back(handle->name());
+    for (auto& subfield : handle->requiredSubfields()) {
+      VELOX_USER_CHECK_EQ(
+          getColumnName(subfield),
+          handle->name(),
+          "Required subfield does not match column name");
+      subfields_[handle->name()].push_back(&subfield);
+    }
+  }
+
+  if (connectorConfig_->isFileColumnNamesReadAsLowerCase(
+          connectorQueryCtx->sessionProperties())) {
+    checkColumnNameLowerCase(outputType_);
+    checkColumnNameLowerCase(tableHandle_->subfieldFilters(), infoColumns_);
+    checkColumnNameLowerCase(tableHandle_->remainingFilter());
+  }
+
+  for (const auto& [k, v] : tableHandle_->subfieldFilters()) {
+    filters_.emplace(k.clone(), v->clone());
+  }
+  double sampleRate = 1;
+  auto remainingFilter = extractFiltersFromRemainingFilter(
+      tableHandle_->remainingFilter(),
+      expressionEvaluator_,
+      false,
+      filters_,
+      sampleRate);
+  if (sampleRate != 1) {
+    randomSkip_ = std::make_shared<random::RandomSkipTracker>(sampleRate);
+  }
+
+  std::vector<velox::common::Subfield> remainingFilterSubfields;
+  if (remainingFilter) {
+    remainingFilterExprSet_ = expressionEvaluator_->compile(remainingFilter);
+    auto& remainingFilterExpr = remainingFilterExprSet_->expr(0);
+    folly::F14FastMap<std::string, column_index_t> columnNames;
+    for (int i = 0; i < readColumnNames.size(); ++i) {
+      columnNames[readColumnNames[i]] = i;
+    }
+    for (auto& input : remainingFilterExpr->distinctFields()) {
+      auto it = columnNames.find(input->field());
+      if (it != columnNames.end()) {
+        if (shouldEagerlyMaterialize(*remainingFilterExpr, *input)) {
+          multiReferencedFields_.push_back(it->second);
+        }
+        continue;
+      }
+      // Remaining filter may reference columns that are not used otherwise,
+      // e.g. are not being projected out and are not used in range filters.
+      // Make sure to add these columns to readerOutputType_.
+      readColumnNames.push_back(input->field());
+      readColumnTypes.push_back(input->type());
+    }
+    remainingFilterSubfields = remainingFilterExpr->extractSubfields();
+    if (VLOG_IS_ON(1)) {
+      VLOG(1) << fmt::format(
+          "Extracted subfields from remaining filter: [{}]",
+          fmt::join(remainingFilterSubfields, ", "));
+    }
+    for (auto& subfield : remainingFilterSubfields) {
+      const auto& name = getColumnName(subfield);
+      auto it = subfields_.find(name);
+      if (it != subfields_.end()) {
+        // Some subfields of the column are already projected out, we append the
+        // remainingFilter subfield
+        it->second.push_back(&subfield);
+      } else if (columnNames.count(name) == 0) {
+        // remainingFilter subfield's column is not projected out, we add the
+        // column and append the subfield
+        subfields_[name].push_back(&subfield);
+      }
+    }
+  }
+
+  readerOutputType_ =
+      ROW(std::move(readColumnNames), std::move(readColumnTypes));
+
+  scanSpec_ = makeScanSpec();
+
+  if (remainingFilter) {
+    metadataFilter_ = std::make_shared<velox::common::MetadataFilter>(
+        *scanSpec_, *remainingFilter, expressionEvaluator_);
+  }
+
+  ioStats_ = std::make_shared<io::IoStatistics>();
+  fsStats_ = std::make_shared<filesystems::File::IoStats>();
+}
+
+std::optional<RowVectorPtr> IcebergDataSource::next(
+    uint64_t size,
+    velox::ContinueFuture& /*future*/) {
+  VELOX_CHECK(split_ != nullptr, "No split to process. Call addSplit first.");
+  VELOX_CHECK_NOT_NULL(splitReader_, "No split reader present");
+
+  TestValue::adjust(
+      "facebook::velox::connector::lakehouse::common::DataSourceBase::next",
+      this);
+
+  if (splitReader_->emptySplit()) {
+    resetSplit();
+    return nullptr;
+  }
+
+  if (!output_) {
+    output_ = BaseVector::create(readerOutputType_, 0, pool_);
+  }
+
+  const auto rowsScanned = splitReader_->next(size, output_);
+  completedRows_ += rowsScanned;
+  if (rowsScanned == 0) {
+    splitReader_->updateRuntimeStats(runtimeStats_);
+    resetSplit();
+    return nullptr;
+  }
+
+  VELOX_CHECK(
+      !output_->mayHaveNulls(), "Top-level row vector cannot have nulls");
+  auto rowsRemaining = output_->size();
+  if (rowsRemaining == 0) {
+    // no rows passed the pushed down filters.
+    return getEmptyOutput();
+  }
+
+  auto rowVector = std::dynamic_pointer_cast<RowVector>(output_);
+
+  // In case there is a remaining filter that excludes some but not all
+  // rows, collect the indices of the passing rows. If there is no filter,
+  // or it passes on all rows, leave this as null and let exec::wrap skip
+  // wrapping the results.
+  BufferPtr remainingIndices;
+  if (hasRemainingPartitionFilter()) {
+    rowsRemaining =
+        evaluateRemainingPartitionFilter(rowVector, remainingIndices);
+
+    if (rowsRemaining == 0) {
+      return getEmptyOutput();
+    }
+  }
+
+  // TODO: remove if?
+  if (remainingFilterExprSet_) {
+    rowsRemaining = evaluateRemainingFilter(rowVector);
+    VELOX_CHECK_LE(rowsRemaining, rowsScanned);
+    if (rowsRemaining == 0) {
+      // No rows passed the remaining filter.
+      return getEmptyOutput();
+    }
+
+    if (rowsRemaining < rowVector->size()) {
+      // Some, but not all rows passed the remaining filter.
+      remainingIndices = filterEvalCtx_.selectedIndices;
+    }
+  }
+
+  if (outputType_->size() == 0) {
+    return exec::wrap(rowsRemaining, remainingIndices, rowVector);
+  }
+
+  std::vector<VectorPtr> outputColumns;
+  outputColumns.reserve(outputType_->size());
+  for (int i = 0; i < outputType_->size(); ++i) {
+    auto& child = rowVector->childAt(i);
+    if (remainingIndices) {
+      // Disable dictionary values caching in expression eval so that we
+      // don't need to reallocate the result for every batch.
+      child->disableMemo();
+    }
+    outputColumns.emplace_back(
+        exec::wrapChild(rowsRemaining, remainingIndices, child));
+  }
+
+  return std::make_shared<RowVector>(
+      pool_, outputType_, BufferPtr(nullptr), rowsRemaining, outputColumns);
+}
+
+void IcebergDataSource::addSplit(std::shared_ptr<ConnectorSplit> split) {
+  VELOX_CHECK_NULL(
+      split_,
+      "Previous split has not been processed yet. Call next to process the split.");
+  split_ = std::dynamic_pointer_cast<IcebergConnectorSplit>(split);
+  VELOX_CHECK_NOT_NULL(split_, "Wrong type of split");
+  VLOG(1) << "Adding split " << split_->toString();
+
+  if (splitReader_) {
+    splitReader_.reset();
+  }
+
+  splitReader_ = std::make_unique<IcebergSplitReader>(
+      split_,
+      tableHandle_,
+      &partitionColumnHandles_,
+      connectorQueryCtx_,
+      connectorConfig_,
+      readerOutputType_,
+      ioStats_,
+      fsStats_,
+      fileHandleFactory_,
+      executor_,
+      scanSpec_);
+
+  // Split reader subclasses may need to use the reader options in prepareSplit
+  // so we initialize it beforehand.
+
+  splitReader_->configureReaderOptions(randomSkip_);
+  splitReader_->prepareSplit(metadataFilter_, runtimeStats_);
+  readerOutputType_ = splitReader_->readerOutputType();
+}
+
+std::shared_ptr<velox::common::ScanSpec> IcebergDataSource::makeScanSpec() {
+  auto spec = std::make_shared<velox::common::ScanSpec>("root");
+  folly::F14FastMap<std::string, std::vector<const velox::common::Subfield*>>
+      filterSubfields;
+  std::vector<SubfieldSpec> subfieldSpecs;
+  for (auto& [subfield, _] : filters_) {
+    if (auto name = subfield.toString();
+        !isSynthesizedColumn(name, infoColumns_) &&
+        partitionColumnHandles_.count(name) == 0) {
+      VELOX_CHECK(!isSpecialColumn(name));
+      filterSubfields[getColumnName(subfield)].push_back(&subfield);
+    }
+  }
+
+  // Process columns that will be projected out.
+  for (int i = 0; i < readerOutputType_->size(); ++i) {
+    auto& name = readerOutputType_->nameOf(i);
+    auto& type = readerOutputType_->childAt(i);
+
+    // Different table formats may have different special columns. They would be
+    // handled differently by corresponding connectors.
+    if (isSpecialColumn(name)) {
+      continue;
+    }
+
+    auto dataColumns = tableHandle_->dataColumns();
+    auto it = subfields_.find(name);
+    if (it == subfields_.end()) {
+      auto* fieldSpec = spec->addFieldRecursively(name, *type, i);
+      processFieldSpec(dataColumns, type, *fieldSpec);
+      filterSubfields.erase(name);
+      continue;
+    }
+    for (auto* subfield : it->second) {
+      subfieldSpecs.push_back({subfield, false});
+    }
+    it = filterSubfields.find(name);
+    if (it != filterSubfields.end()) {
+      for (auto* subfield : it->second) {
+        subfieldSpecs.push_back({subfield, true});
+      }
+      filterSubfields.erase(it);
+    }
+    auto* fieldSpec = spec->addField(name, i);
+    addSubfields(*type, subfieldSpecs, 1, pool_, *fieldSpec);
+    processFieldSpec(dataColumns, type, *fieldSpec);
+    subfieldSpecs.clear();
+  }
+
+  // Now process the columns that will not be projected out.
+  if (!filterSubfields.empty()) {
+    VELOX_CHECK_NOT_NULL(tableHandle_->dataColumns());
+    for (auto& [fieldName, subfields] : filterSubfields) {
+      for (auto* subfield : subfields) {
+        subfieldSpecs.push_back({subfield, true});
+      }
+      auto& type = tableHandle_->dataColumns()->findChild(fieldName);
+      auto* fieldSpec = spec->getOrCreateChild(fieldName);
+      addSubfields(*type, subfieldSpecs, 1, pool_, *fieldSpec);
+      processFieldSpec(tableHandle_->dataColumns(), type, *fieldSpec);
+      subfieldSpecs.clear();
+    }
+  }
+
+  for (auto& pair : filters_) {
+    const auto name = pair.first.toString();
+    // SelectiveColumnReader doesn't support constant columns with filters,
+    // hence, we can't have a filter for a $path or $bucket column.
+    //
+    // Unfortunately, Presto happens to specify a filter for $path, $file_size,
+    // $file_modified_time or $bucket column. This filter is redundant and needs
+    // to be removed.
+    // TODO Remove this check when Presto is fixed to not specify a filter
+    // on $path and $bucket column.
+    if (isSynthesizedColumn(name, infoColumns_)) {
+      continue;
+    }
+    auto fieldSpec = spec->getOrCreateChild(pair.first);
+    fieldSpec->setFilter(pair.second);
+  }
+
+  if (connectorConfig_->readStatsBasedFilterReorderDisabled(
+          connectorQueryCtx_->sessionProperties())) {
+    spec->disableStatsBasedFilterReorder();
+  }
+
+  return spec;
+}
+
+bool IcebergDataSource::isSpecialColumn(const std::string& name) const {
+  // TODO: is_deleted, etc.
+  return false;
+}
+
+vector_size_t IcebergDataSource::evaluateRemainingPartitionFilter(
+    RowVectorPtr& rowVector,
+    BufferPtr& remainingIndices) {
+  // If there are filter functions on the partition columns, evaluate them here
+
+  return rowVector->size();
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDataSource.h b/velox/connectors/lakehouse/iceberg/IcebergDataSource.h
new file mode 100644
index 000000000000..9abd660f4510
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDataSource.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "DataSourceBase.h"
+#include "FileHandle.h"
+#include "velox/common/base/RandomUtil.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/common/io/IoStatistics.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergSplitReader.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergTableHandle.h"
+#include "velox/dwio/common/Statistics.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+namespace {
+
+bool isMember(
+    const std::vector<exec::FieldReference*>& fields,
+    const exec::FieldReference& field);
+
+bool shouldEagerlyMaterialize(
+    const exec::Expr& remainingFilter,
+    const exec::FieldReference& field);
+
+}
+
+class IcebergDataSource : public DataSourceBase {
+ public:
+  IcebergDataSource(
+      const RowTypePtr& outputType,
+      const ConnectorTableHandlePtr& tableHandle,
+      const connector::ColumnHandleMap& columnHandles,
+      FileHandleFactory* fileHandleFactory,
+      folly::Executor* executor,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      const std::shared_ptr<ConnectorConfigBase>& connectorConfig);
+
+  void addSplit(std::shared_ptr<ConnectorSplit> split) override;
+
+  std::optional<RowVectorPtr> next(uint64_t size, velox::ContinueFuture& future)
+      override;
+
+  const ConnectorQueryCtx* testingConnectorQueryCtx() const {
+    return connectorQueryCtx_;
+  }
+
+ private:
+  std::shared_ptr<velox::common::ScanSpec> makeScanSpec();
+
+  bool isSpecialColumn(const std::string& name) const override;
+  void setupRowIdColumn();
+
+  vector_size_t evaluateRemainingPartitionFilter(
+      RowVectorPtr& rowVector,
+      BufferPtr& remainingIndices) override;
+
+  std::unique_ptr<IcebergPartitionFunction> partitionFunction_;
+  std::vector<uint32_t> partitions_;
+};
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.cpp b/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.cpp
new file mode 100644
index 000000000000..8fb37eb27958
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergDeleteFile.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+fmt::underlying_t<FileContent> format_as(FileContent f) {
+  return fmt::underlying(f);
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.h b/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.h
new file mode 100644
index 000000000000..8ad20afe6585
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergDeleteFile.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "velox/dwio/common/Options.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+enum class FileContent {
+  kData,
+  kPositionalDeletes,
+  kEqualityDeletes,
+};
+
+/// Formatter for fmt.
+fmt::underlying_t<FileContent> format_as(FileContent f);
+
+struct IcebergDeleteFile {
+  FileContent content;
+  const std::string filePath;
+  dwio::common::FileFormat fileFormat;
+  uint64_t recordCount;
+  uint64_t fileSizeInBytes;
+  // The field ids for the delete columns for equality delete files
+  std::vector<int32_t> equalityFieldIds;
+  // The lower bounds of the in-file positions for the deleted rows, identified
+  // by each column's field id. E.g. The deleted rows for a column with field id
+  // 1 is in range [10, 50], where 10 and 50 are the deleted row positions in
+  // the data file, then lowerBounds would contain entry <1, "10">
+  std::unordered_map<int32_t, std::string> lowerBounds;
+  // The upper bounds of the in-file positions for the deleted rows, identified
+  // by each column's field id. E.g. The deleted rows for a column with field id
+  // 1 is in range [10, 50], then upperBounds will contain entry <1, "50">
+  std::unordered_map<int32_t, std::string> upperBounds;
+
+  // TODO: DeleteFile can also be partitioned. PartitionData and PartitionSpec
+  // would be needed.
+
+  IcebergDeleteFile(
+      FileContent _content,
+      const std::string& _filePath,
+      dwio::common::FileFormat _fileFormat,
+      uint64_t _recordCount,
+      uint64_t _fileSizeInBytes,
+      std::vector<int32_t> _equalityFieldIds = {},
+      std::unordered_map<int32_t, std::string> _lowerBounds = {},
+      std::unordered_map<int32_t, std::string> _upperBounds = {})
+      : content(_content),
+        filePath(_filePath),
+        fileFormat(_fileFormat),
+        recordCount(_recordCount),
+        fileSizeInBytes(_fileSizeInBytes),
+        equalityFieldIds(_equalityFieldIds),
+        lowerBounds(_lowerBounds),
+        upperBounds(_upperBounds) {}
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergMetadataColumns.h b/velox/connectors/lakehouse/iceberg/IcebergMetadataColumns.h
new file mode 100644
index 000000000000..1a7e88667d27
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergMetadataColumns.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "velox/type/Type.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+struct IcebergMetadataColumn {
+  int id;
+  std::string name;
+  std::shared_ptr<const Type> type;
+  std::string doc;
+
+  IcebergMetadataColumn(
+      int _id,
+      const std::string& _name,
+      std::shared_ptr<const Type> _type,
+      const std::string& _doc)
+      : id(_id), name(_name), type(_type), doc(_doc) {}
+
+  static std::shared_ptr<IcebergMetadataColumn> icebergDeleteFilePathColumn() {
+    return std::make_shared<IcebergMetadataColumn>(
+        2147483546,
+        "file_path",
+        VARCHAR(),
+        "Path of a file in which a deleted row is stored");
+  }
+
+  static std::shared_ptr<IcebergMetadataColumn> icebergDeletePosColumn() {
+    return std::make_shared<IcebergMetadataColumn>(
+        2147483545,
+        "pos",
+        BIGINT(),
+        "Ordinal position of a deleted row in the data file");
+  }
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.cpp b/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.cpp
new file mode 100644
index 000000000000..9e0f5c88682d
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.cpp
@@ -0,0 +1,557 @@
+/*
+* Copyright (c) Facebook, Inc. and its affiliates.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#include "IcebergPartitionFunction.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+namespace {
+void mergeHash(bool mix, uint32_t oneHash, uint32_t& aggregateHash) {
+ aggregateHash = mix ? aggregateHash * 31 + oneHash : oneHash;
+}
+
+int32_t hashInt64(int64_t value) {
+ return ((*reinterpret_cast<uint64_t*>(&value)) >> 32) ^ value;
+}
+
+#if defined(__has_feature)
+#if __has_feature(__address_sanitizer__)
+__attribute__((no_sanitize("integer")))
+#endif
+#endif
+uint32_t
+hashBytes(StringView bytes, int32_t initialValue) {
+ uint32_t hash = initialValue;
+ auto* data = bytes.data();
+ for (auto i = 0; i < bytes.size(); ++i) {
+   hash = hash * 31 + *reinterpret_cast<const int8_t*>(data + i);
+ }
+ return hash;
+}
+
+int32_t hashTimestamp(const Timestamp& ts) {
+ return hashInt64((ts.getSeconds() << 30) | ts.getNanos());
+}
+
+template <TypeKind kind>
+inline uint32_t hashOne(typename TypeTraits<kind>::NativeType /* value */) {
+ VELOX_UNSUPPORTED(
+     "Iceberg partitioning function doesn't support {} type",
+     TypeTraits<kind>::name);
+ return 0; // Make compiler happy.
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::BOOLEAN>(bool value) {
+ return value ? 1 : 0;
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::TINYINT>(int8_t value) {
+ return static_cast<uint32_t>(value);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::SMALLINT>(int16_t value) {
+ return static_cast<uint32_t>(value);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::INTEGER>(int32_t value) {
+ return static_cast<uint32_t>(value);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::REAL>(float value) {
+ return static_cast<uint32_t>(*reinterpret_cast<const int32_t*>(&value));
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::BIGINT>(int64_t value) {
+ return hashInt64(value);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::DOUBLE>(double value) {
+ return hashInt64(*reinterpret_cast<const int64_t*>(&value));
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::VARCHAR>(StringView value) {
+ return hashBytes(value, 0);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::VARBINARY>(StringView value) {
+ return hashBytes(value, 0);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::TIMESTAMP>(Timestamp value) {
+ return hashTimestamp(value);
+}
+
+template <>
+inline uint32_t hashOne<TypeKind::UNKNOWN>(UnknownValue /*value*/) {
+ VELOX_FAIL("Unknown values cannot be non-NULL");
+}
+
+template <TypeKind kind>
+void hashPrimitive(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes) {
+ if (rows.isAllSelected()) {
+   // The compiler seems to be a little fickle with optimizations.
+   // Although rows.applyToSelected should do roughly the same thing, doing
+   // this here along with assigning rows.size() to a variable seems to help
+   // the compiler to inline hashOne showing a 50% performance improvement in
+   // benchmarks.
+   vector_size_t numRows = rows.size();
+   for (auto i = 0; i < numRows; ++i) {
+     const uint32_t hash = values.isNullAt(i)
+         ? 0
+         : hashOne<kind>(
+               values.valueAt<typename TypeTraits<kind>::NativeType>(i));
+     mergeHash(mix, hash, hashes[i]);
+   }
+ } else {
+   rows.applyToSelected([&](auto row) INLINE_LAMBDA {
+     const uint32_t hash = values.isNullAt(row)
+         ? 0
+         : hashOne<kind>(
+               values.valueAt<typename TypeTraits<kind>::NativeType>(row));
+     mergeHash(mix, hash, hashes[row]);
+   });
+ }
+}
+
+void hashPrecomputed(
+   uint32_t precomputedHash,
+   vector_size_t numRows,
+   bool mix,
+   std::vector<uint32_t>& hashes) {
+ for (auto i = 0; i < numRows; ++i) {
+   hashes[i] = mix ? hashes[i] * 31 + precomputedHash : precomputedHash;
+ }
+}
+} // namespace
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::BOOLEAN>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::BOOLEAN>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::TINYINT>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::TINYINT>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::SMALLINT>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::SMALLINT>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::INTEGER>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::INTEGER>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::REAL>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::REAL>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::BIGINT>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::BIGINT>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::DOUBLE>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::DOUBLE>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::VARCHAR>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::VARCHAR>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::VARBINARY>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::VARBINARY>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::TIMESTAMP>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::TIMESTAMP>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::UNKNOWN>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t /* poolIndex */) {
+ hashPrimitive<TypeKind::UNKNOWN>(values, rows, mix, hashes);
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::OPAQUE>(
+   const DecodedVector& /*values*/,
+   const SelectivityVector& /*rows*/,
+   bool /*mix*/,
+   std::vector<uint32_t>& /*hashes*/,
+   size_t /* poolIndex */) {
+ VELOX_UNSUPPORTED("Iceberg partitioning function doesn't support OPAQUE type");
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::ARRAY>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t poolIndex) {
+ auto& elementsDecoded = getDecodedVector(poolIndex);
+ auto& elementsRows = getRows(poolIndex);
+ auto& elementsHashes = getHashes(poolIndex);
+
+ const auto* arrayVector = values.base()->as<ArrayVector>();
+ const vector_size_t elementsSize = arrayVector->elements()->size();
+ elementsRows.resizeFill(elementsSize, false);
+ elementsHashes.resize(elementsSize);
+
+ rows.applyToSelected([&](auto row) {
+   if (!values.isNullAt(row)) {
+     const auto index = values.index(row);
+     const auto offset = arrayVector->offsetAt(index);
+     const auto length = arrayVector->sizeAt(index);
+
+     elementsRows.setValidRange(offset, offset + length, true);
+   }
+ });
+
+ elementsRows.updateBounds();
+
+ elementsDecoded.decode(*arrayVector->elements(), elementsRows);
+
+ hash(
+     elementsDecoded,
+     elementsDecoded.base()->typeKind(),
+     elementsRows,
+     false,
+     elementsHashes,
+     poolIndex + 1);
+
+ rows.applyToSelected([&](auto row) {
+   uint32_t hash = 0;
+
+   if (!values.isNullAt(row)) {
+     const auto index = values.index(row);
+     const auto offset = arrayVector->offsetAt(index);
+     const auto length = arrayVector->sizeAt(index);
+
+     for (size_t i = offset; i < offset + length; ++i) {
+       mergeHash(true, elementsHashes[i], hash);
+     }
+   }
+
+   mergeHash(mix, hash, hashes[row]);
+ });
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::MAP>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t poolIndex) {
+ auto& valuesDecoded = getDecodedVector(poolIndex);
+ auto& keysDecoded = getDecodedVector(poolIndex + 1);
+ auto& elementsRows = getRows(poolIndex);
+ auto& valuesHashes = getHashes(poolIndex);
+ auto& keysHashes = getHashes(poolIndex + 1);
+
+ const auto* mapVector = values.base()->as<MapVector>();
+ const vector_size_t elementsSize = mapVector->mapKeys()->size();
+ elementsRows.resizeFill(elementsSize, false);
+ keysHashes.resize(elementsSize);
+ valuesHashes.resize(elementsSize);
+
+ rows.applyToSelected([&](auto row) {
+   if (!values.isNullAt(row)) {
+     const auto index = values.index(row);
+     const auto offset = mapVector->offsetAt(index);
+     const auto length = mapVector->sizeAt(index);
+
+     elementsRows.setValidRange(offset, offset + length, true);
+   }
+ });
+
+ elementsRows.updateBounds();
+
+ keysDecoded.decode(*mapVector->mapKeys(), elementsRows);
+ valuesDecoded.decode(*mapVector->mapValues(), elementsRows);
+
+ hash(
+     keysDecoded,
+     keysDecoded.base()->typeKind(),
+     elementsRows,
+     false,
+     keysHashes,
+     poolIndex + 2);
+
+ hash(
+     valuesDecoded,
+     valuesDecoded.base()->typeKind(),
+     elementsRows,
+     false,
+     valuesHashes,
+     poolIndex + 2);
+
+ rows.applyToSelected([&](auto row) {
+   uint32_t hash = 0;
+
+   if (!values.isNullAt(row)) {
+     const auto index = values.index(row);
+     const auto offset = mapVector->offsetAt(index);
+     const auto length = mapVector->sizeAt(index);
+
+     for (size_t i = offset; i < offset + length; ++i) {
+       hash += keysHashes[i] ^ valuesHashes[i];
+     }
+   }
+
+   mergeHash(mix, hash, hashes[row]);
+ });
+}
+
+template <>
+void IcebergPartitionFunction::hashTyped<TypeKind::ROW>(
+   const DecodedVector& values,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t poolIndex) {
+ auto& childDecodedVector = getDecodedVector(poolIndex);
+ auto& childRows = getRows(poolIndex);
+ auto& childHashes = getHashes(poolIndex);
+
+ const auto* rowVector = values.base()->as<RowVector>();
+ childRows.resizeFill(rowVector->size(), false);
+ childHashes.resize(rowVector->size());
+
+ rows.applyToSelected([&](auto row) {
+   if (!values.isNullAt(row)) {
+     childRows.setValid(values.index(row), true);
+   }
+ });
+
+ childRows.updateBounds();
+
+ for (vector_size_t i = 0; i < rowVector->childrenSize(); ++i) {
+   auto& child = rowVector->childAt(i);
+   childDecodedVector.decode(*child, childRows);
+   hash(
+       childDecodedVector,
+       child->typeKind(),
+       childRows,
+       i > 0,
+       childHashes,
+       poolIndex + 1);
+ }
+
+ rows.applyToSelected([&](auto row) {
+   mergeHash(
+       mix,
+       values.isNullAt(row) ? 0 : childHashes[values.index(row)],
+       hashes[row]);
+ });
+}
+
+void IcebergPartitionFunction::hash(
+   const DecodedVector& values,
+   TypeKind typeKind,
+   const SelectivityVector& rows,
+   bool mix,
+   std::vector<uint32_t>& hashes,
+   size_t poolIndex) {
+ // This function mirrors the behavior of function hashCode in
+ // HIVE-12025 ba83fd7bff
+ // serde/src/java/org/apache/hadoop/iceberg/serde2/objectinspector/ObjectInspectorUtils.java
+ // https://github.com/apache/iceberg/blob/ba83fd7bff/serde/src/java/org/apache/hadoop/iceberg/serde2/objectinspector/ObjectInspectorUtils.java
+
+ // HIVE-7148 proposed change to bucketing hash algorithms. If that
+ // gets implemented, this function will need to change
+ // significantly.
+
+ VELOX_DYNAMIC_TYPE_DISPATCH_ALL(
+     hashTyped, typeKind, values, rows, mix, hashes, poolIndex);
+}
+
+IcebergPartitionFunction::IcebergPartitionFunction(
+   int numBuckets,
+   std::vector<int> bucketToPartition,
+   std::vector<column_index_t> keyChannels,
+   const std::vector<VectorPtr>& constValues)
+   : numBuckets_{numBuckets},
+     bucketToPartition_{bucketToPartition},
+     keyChannels_{std::move(keyChannels)} {
+ precomputedHashes_.resize(keyChannels_.size());
+ size_t constChannel{0};
+ for (auto i = 0; i < keyChannels_.size(); ++i) {
+   if (keyChannels_[i] == kConstantChannel) {
+     precompute(*(constValues[constChannel++]), i);
+   }
+ }
+}
+
+std::optional<uint32_t> IcebergPartitionFunction::partition(
+   const RowVector& input,
+   std::vector<uint32_t>& partitions) {
+ const auto numRows = input.size();
+
+ auto& decodedVector = getDecodedVector();
+ auto& rows = getRows();
+ auto& hashes = getHashes();
+ rows.resizeFill(numRows, true);
+ if (numRows > hashes.size()) {
+   hashes.resize(numRows);
+ }
+ partitions.resize(numRows);
+ for (auto i = 0; i < keyChannels_.size(); ++i) {
+   if (keyChannels_[i] != kConstantChannel) {
+     const auto& keyVector = input.childAt(keyChannels_[i]);
+     decodedVector.decode(*keyVector, rows);
+     hash(decodedVector, keyVector->typeKind(), rows, i > 0, hashes, 1);
+   } else {
+     hashPrecomputed(precomputedHashes_[i], numRows, i > 0, hashes);
+   }
+ }
+
+ static const int32_t kInt32Max = std::numeric_limits<int32_t>::max();
+
+ if (bucketToPartition_.empty()) {
+   // NOTE: if bucket to partition mapping is empty, then we do
+   // identical mapping.
+   for (auto i = 0; i < numRows; ++i) {
+     partitions[i] = (hashes[i] & kInt32Max) % numBuckets_;
+   }
+ } else {
+   for (auto i = 0; i < numRows; ++i) {
+     partitions[i] =
+         bucketToPartition_[((hashes[i] & kInt32Max) % numBuckets_)];
+   }
+ }
+
+ return std::nullopt;
+}
+
+void IcebergPartitionFunction::precompute(
+   const BaseVector& value,
+   size_t channelIndex) {
+ if (value.isNullAt(0)) {
+   precomputedHashes_[channelIndex] = 0;
+   return;
+ }
+
+ const SelectivityVector rows(1, true);
+ DecodedVector& decodedVector = getDecodedVector();
+ decodedVector.decode(value, rows);
+
+ std::vector<uint32_t> hashes{1};
+ hash(decodedVector, value.typeKind(), rows, false, hashes, 1);
+ precomputedHashes_[channelIndex] = hashes[0];
+}
+
+DecodedVector& IcebergPartitionFunction::getDecodedVector(size_t poolIndex) {
+ while (poolIndex >= decodedVectorsPool_.size()) {
+   decodedVectorsPool_.push_back(std::make_unique<DecodedVector>());
+ }
+
+ return *decodedVectorsPool_[poolIndex];
+}
+
+SelectivityVector& IcebergPartitionFunction::getRows(size_t poolIndex) {
+ while (poolIndex >= rowsPool_.size()) {
+   rowsPool_.push_back(std::make_unique<SelectivityVector>());
+ }
+
+ return *rowsPool_[poolIndex];
+}
+
+std::vector<uint32_t>& IcebergPartitionFunction::getHashes(size_t poolIndex) {
+ while (poolIndex >= hashesPool_.size()) {
+   hashesPool_.push_back(std::make_unique<std::vector<uint32_t>>());
+ }
+
+ return *hashesPool_[poolIndex];
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.h b/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.h
new file mode 100644
index 000000000000..61a3db116cad
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergPartitionFunction.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/core/PlanNode.h"
+#include "velox/vector/DecodedVector.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+class IcebergPartitionFunction : public core::PartitionFunction {
+ public:
+  IcebergPartitionFunction(
+      int numBuckets,
+      std::vector<int> bucketToPartition,
+      std::vector<column_index_t> keyChannels,
+      const std::vector<VectorPtr>& constValues = {});
+
+  IcebergPartitionFunction(
+      int numBuckets,
+      std::vector<column_index_t> keyChannels,
+      const std::vector<VectorPtr>& constValues = {})
+      : IcebergPartitionFunction(
+            numBuckets,
+            {},
+            std::move(keyChannels),
+            constValues) {}
+
+  ~IcebergPartitionFunction() override = default;
+
+  std::optional<uint32_t> partition(
+      const RowVector& input,
+      std::vector<uint32_t>& partitions) override;
+
+  const std::vector<int>& testingBucketToPartition() const {
+    return bucketToPartition_;
+  }
+
+ private:
+  // Precompute single value hive hash for a constant partition key.
+  void precompute(const BaseVector& value, size_t column_index_t);
+
+  void hash(
+      const DecodedVector& values,
+      TypeKind typeKind,
+      const SelectivityVector& rows,
+      bool mix,
+      std::vector<uint32_t>& hashes,
+      size_t poolIndex);
+
+  template <TypeKind kind>
+  void hashTyped(
+      const DecodedVector& /* values */,
+      const SelectivityVector& /* rows */,
+      bool /* mix */,
+      std::vector<uint32_t>& /* hashes */,
+      size_t /* poolIndex */) {
+    VELOX_UNSUPPORTED(
+        "Iceberg partitioning function doesn't support {} type",
+        TypeTraits<kind>::name);
+  }
+
+  // Helper functions to retrieve reusable memory from pools.
+  DecodedVector& getDecodedVector(size_t poolIndex = 0);
+  SelectivityVector& getRows(size_t poolIndex = 0);
+  std::vector<uint32_t>& getHashes(size_t poolIndex = 0);
+
+  const int numBuckets_;
+  const std::vector<int> bucketToPartition_;
+  const std::vector<column_index_t> keyChannels_;
+
+  // Pools of reusable memory.
+  std::vector<std::unique_ptr<std::vector<uint32_t>>> hashesPool_;
+  std::vector<std::unique_ptr<SelectivityVector>> rowsPool_;
+  std::vector<std::unique_ptr<DecodedVector>> decodedVectorsPool_;
+  // Precomputed hashes for constant partition keys (one per key).
+  std::vector<uint32_t> precomputedHashes_;
+};
+
+class IcebergPartitionSpec {};
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.cpp b/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.cpp
new file mode 100644
index 000000000000..0599146a2c5d
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.h"
+
+#include "velox/vector/SimpleVector.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+#define PARTITION_TYPE_DISPATCH(TEMPLATE_FUNC, typeKind, ...)               \
+  [&]() {                                                                   \
+    switch (typeKind) {                                                     \
+      case TypeKind::BOOLEAN:                                               \
+      case TypeKind::TINYINT:                                               \
+      case TypeKind::SMALLINT:                                              \
+      case TypeKind::INTEGER:                                               \
+      case TypeKind::BIGINT:                                                \
+      case TypeKind::VARCHAR:                                               \
+      case TypeKind::VARBINARY:                                             \
+      case TypeKind::TIMESTAMP:                                             \
+        return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(                          \
+            TEMPLATE_FUNC, typeKind, __VA_ARGS__);                          \
+      default:                                                              \
+        VELOX_UNSUPPORTED(                                                  \
+            "Unsupported partition type: {}", TypeKindName::toName(typeKind)); \
+    }                                                                       \
+  }()
+
+namespace {
+template <typename T>
+inline std::string makePartitionValueString(T value) {
+  return folly::to<std::string>(value);
+}
+
+template <>
+inline std::string makePartitionValueString(bool value) {
+  return value ? "true" : "false";
+}
+
+template <>
+inline std::string makePartitionValueString(Timestamp value) {
+  value.toTimezone(Timestamp::defaultTimezone());
+  TimestampToStringOptions options;
+  options.dateTimeSeparator = ' ';
+  // Set the precision to milliseconds, and enable the skipTrailingZeros match
+  // the timestamp precision and truncation behavior of Presto.
+  options.precision = TimestampPrecision::kMilliseconds;
+  options.skipTrailingZeros = true;
+
+  auto result = value.toString(options);
+
+  // Presto's java.sql.Timestamp.toString() always keeps at least one decimal
+  // place even when all fractional seconds are zero.
+  // If skipTrailingZeros removed all fractional digits, add back ".0" to match
+  // Presto's behavior.
+  if (auto dotPos = result.find_last_of('.'); dotPos == std::string::npos) {
+    // No decimal point found, add ".0"
+    result += ".0";
+  }
+
+  return result;
+}
+
+template <TypeKind Kind>
+std::pair<std::string, std::string> makePartitionKeyValueString(
+    const BaseVector* partitionVector,
+    vector_size_t row,
+    const std::string& name,
+    bool isDate,
+    const std::string& nullValueName) {
+  using T = typename TypeTraits<Kind>::NativeType;
+  if (partitionVector->as<SimpleVector<T>>()->isNullAt(row)) {
+    return std::make_pair(name, nullValueName);
+  }
+  if (isDate) {
+    return std::make_pair(
+        name,
+        DATE()->toString(
+            partitionVector->as<SimpleVector<int32_t>>()->valueAt(row)));
+  }
+  return std::make_pair(
+      name,
+      makePartitionValueString(
+          partitionVector->as<SimpleVector<T>>()->valueAt(row)));
+}
+
+} // namespace
+
+std::vector<std::pair<std::string, std::string>> extractPartitionKeyValues(
+    const RowVectorPtr& partitionsVector,
+    vector_size_t row,
+    const std::string& nullValueName) {
+  std::vector<std::pair<std::string, std::string>> partitionKeyValues;
+  for (auto i = 0; i < partitionsVector->childrenSize(); i++) {
+    partitionKeyValues.push_back(PARTITION_TYPE_DISPATCH(
+        makePartitionKeyValueString,
+        partitionsVector->childAt(i)->typeKind(),
+        partitionsVector->childAt(i)->loadedVector(),
+        row,
+        asRowType(partitionsVector->type())->nameOf(i),
+        partitionsVector->childAt(i)->type()->isDate(),
+        nullValueName));
+  }
+  return partitionKeyValues;
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.h b/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.h
new file mode 100644
index 000000000000..bb1f41b3c7ee
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/vector/ComplexVector.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+std::vector<std::pair<std::string, std::string>> extractPartitionKeyValues(
+    const RowVectorPtr& partitionsVector,
+    vector_size_t row,
+    const std::string& nullValueName = "");
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergSplitReader.cpp b/velox/connectors/lakehouse/iceberg/IcebergSplitReader.cpp
new file mode 100644
index 000000000000..176244809050
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergSplitReader.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergSplitReader.h"
+
+#include "IcebergConnectorSplit.h"
+#include "IcebergConnectorUtil.h"
+#include "IcebergDeleteFile.h"
+#include "IcebergTableHandle.h"
+#include "velox/dwio/common/BufferUtil.h"
+
+using namespace facebook::velox::dwio::common;
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+IcebergSplitReader::IcebergSplitReader(
+    const std::shared_ptr<const ConnectorSplitBase>& split,
+    const std::shared_ptr<const TableHandleBase>& tableHandle,
+    const std::unordered_map<
+        std::string,
+        std::shared_ptr<const ColumnHandleBase>>* partitionKeys,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+    const RowTypePtr& readerOutputType,
+    const std::shared_ptr<io::IoStatistics>& ioStats,
+    const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+    FileHandleFactory* fileHandleFactory,
+    folly::Executor* executor,
+    const std::shared_ptr<velox::common::ScanSpec>& scanSpec)
+    : SplitReaderBase(
+          split,
+          tableHandle,
+          partitionKeys,
+          connectorQueryCtx,
+          ConnectorConfigBase,
+          readerOutputType,
+          ioStats,
+          fsStats,
+          fileHandleFactory,
+          executor,
+          scanSpec),
+      baseReadOffset_(0),
+      splitOffset_(0),
+      deleteBitmap_(nullptr) {}
+
+IcebergSplitReader::~IcebergSplitReader() {}
+
+void IcebergSplitReader::prepareSplit(
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  createReader();
+  if (emptySplit_) {
+    return;
+  }
+  auto rowType = getAdaptedRowType();
+
+  std::shared_ptr<const IcebergConnectorSplit> icebergSplit =
+      std::dynamic_pointer_cast<const IcebergConnectorSplit>(split_);
+  VELOX_CHECK_NOT_NULL(icebergSplit);
+
+  const auto& deleteFiles = icebergSplit->deleteFiles;
+  std::unordered_set<int32_t> equalityFieldIds;
+  for (const auto& deleteFile : deleteFiles) {
+    if (deleteFile.content == FileContent::kEqualityDeletes &&
+        deleteFile.recordCount > 0) {
+      equalityFieldIds.insert(
+          deleteFile.equalityFieldIds.begin(),
+          deleteFile.equalityFieldIds.end());
+    }
+  }
+
+  if (checkIfSplitIsEmpty(runtimeStats)) {
+    VELOX_CHECK(emptySplit_);
+    return;
+  }
+
+  createRowReader(std::move(metadataFilter), std::move(rowType));
+
+  baseReadOffset_ = 0;
+  splitOffset_ = baseRowReader_->nextRowNumber();
+
+  // Create the positional deletes file readers. They need to be created after
+  // the RowReader is created.
+  positionalDeleteFileReaders_.clear();
+  for (const auto& deleteFile : deleteFiles) {
+    if (deleteFile.content == FileContent::kPositionalDeletes) {
+      if (deleteFile.recordCount > 0) {
+        positionalDeleteFileReaders_.push_back(
+            std::make_unique<PositionalDeleteFileReader>(
+                deleteFile,
+                split_->filePath,
+                fileHandleFactory_,
+                connectorQueryCtx_,
+                executor_,
+                connectorConfig_,
+                ioStats_,
+                fsStats_,
+                runtimeStats,
+                splitOffset_,
+                split_->connectorId));
+      }
+    }
+  }
+}
+
+std::shared_ptr<const dwio::common::TypeWithId>
+IcebergSplitReader::baseFileSchema() {
+  VELOX_CHECK_NOT_NULL(baseReader_.get());
+  return baseReader_->typeWithId();
+}
+
+uint64_t IcebergSplitReader::next(uint64_t size, VectorPtr& output) {
+  Mutation mutation;
+  mutation.randomSkip = baseReaderOpts_.randomSkip().get();
+  mutation.deletedRows = nullptr;
+
+  if (deleteBitmap_) {
+    std::memset(
+        (void*)(deleteBitmap_->asMutable<int8_t>()), 0L, deleteBitmap_->size());
+  }
+
+  const auto actualSize = baseRowReader_->nextReadSize(size);
+
+  if (actualSize == dwio::common::RowReader::kAtEnd) {
+    return 0;
+  }
+
+  if (!positionalDeleteFileReaders_.empty()) {
+    auto numBytes = bits::nbytes(actualSize);
+    dwio::common::ensureCapacity<int8_t>(
+        deleteBitmap_, numBytes, connectorQueryCtx_->memoryPool(), false, true);
+
+    for (auto iter = positionalDeleteFileReaders_.begin();
+         iter != positionalDeleteFileReaders_.end();) {
+      (*iter)->readDeletePositions(baseReadOffset_, actualSize, deleteBitmap_);
+
+      if ((*iter)->noMoreData()) {
+        iter = positionalDeleteFileReaders_.erase(iter);
+      } else {
+        ++iter;
+      }
+    }
+  }
+
+  mutation.deletedRows = deleteBitmap_ && deleteBitmap_->size() > 0
+      ? deleteBitmap_->as<uint64_t>()
+      : nullptr;
+
+  auto rowsScanned = baseRowReader_->next(actualSize, output, &mutation);
+
+  baseReadOffset_ += rowsScanned;
+  return rowsScanned;
+}
+
+bool IcebergSplitReader::filterSplit(
+    dwio::common::RuntimeStatistics& runtimeStats) const {
+  // TODO: Some engines like Flink may write multiple partitions in one data
+  // file. Also, the Iceberg partition spec for one split may be different than
+  // the other split.
+  return iceberg::filterSplit(
+      scanSpec_.get(),
+      baseReader_.get(),
+      split_->filePath,
+      split_->partitionKeys,
+      *partitionColumnHandles_,
+      connectorConfig_->readTimestampPartitionValueAsLocalTime(
+          connectorQueryCtx_->sessionProperties()));
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergSplitReader.h b/velox/connectors/lakehouse/iceberg/IcebergSplitReader.h
new file mode 100644
index 000000000000..5b2729bb4585
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergSplitReader.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "IcebergDeleteFile.h"
+#include "IcebergTableHandle.h"
+#include "PositionalDeleteFileReader.h"
+#include "SplitReaderBase.h"
+#include "velox/exec/OperatorUtils.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+class IcebergSplitReader : public SplitReaderBase {
+ public:
+  IcebergSplitReader(
+      const std::shared_ptr<const ConnectorSplitBase>& split,
+      const std::shared_ptr<const TableHandleBase>& tableHandle,
+      const std::unordered_map<
+          std::string,
+          std::shared_ptr<const ColumnHandleBase>>* partitionKeys,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+      const RowTypePtr& readerOutputType,
+      const std::shared_ptr<io::IoStatistics>& ioStats,
+      const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+      FileHandleFactory* fileHandleFactory,
+      folly::Executor* executor,
+      const std::shared_ptr<velox::common::ScanSpec>& scanSpec);
+
+  ~IcebergSplitReader() override;
+
+  void prepareSplit(
+      std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+      dwio::common::RuntimeStatistics& runtimeStats) override;
+
+  uint64_t next(uint64_t size, VectorPtr& output) override;
+
+  std::shared_ptr<const dwio::common::TypeWithId> baseFileSchema();
+
+ private:
+  bool filterSplit(
+      dwio::common::RuntimeStatistics& runtimeStats) const override;
+
+  // The read offset to the beginning of the split in number of rows for the
+  // current batch for the base data file
+  uint64_t baseReadOffset_;
+  // The file position for the first row in the split
+  uint64_t splitOffset_;
+  std::list<std::unique_ptr<PositionalDeleteFileReader>>
+      positionalDeleteFileReaders_;
+  BufferPtr deleteBitmap_;
+};
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergTableHandle.cpp b/velox/connectors/lakehouse/iceberg/IcebergTableHandle.cpp
new file mode 100644
index 000000000000..b828a4966557
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergTableHandle.cpp
@@ -0,0 +1,75 @@
+/*
+* Copyright (c) Facebook, Inc. and its affiliates.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "IcebergTableHandle.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+folly::dynamic IcebergColumnHandle::serialize() const {
+ folly::dynamic obj = ColumnHandleBase::serializeBase("IcebergColumnHandle");
+ obj["icebergColumnHandleName"] = columnName_;
+ return obj;
+}
+
+std::string IcebergColumnHandle::toString() const {
+ return ColumnHandleBase::toStringBase("IcebergColumnHandle");
+}
+
+ColumnHandlePtr IcebergColumnHandle::create(const folly::dynamic& obj) {
+ auto columnName = obj["icebergColumnHandleName"].asString();
+ auto columnType = columnTypeFromName(obj["columnType"].asString());
+ auto dataType = ISerializable::deserialize<Type>(obj["dataType"]);
+
+ const auto& arr = obj["requiredSubfields"];
+ std::vector<velox::common::Subfield> requiredSubfields;
+ requiredSubfields.reserve(arr.size());
+ for (auto& s : arr) {
+   requiredSubfields.emplace_back(s.asString());
+ }
+
+ return std::make_shared<IcebergColumnHandle>(
+     columnName,
+     columnType,
+     std::move(dataType),
+     std::move(requiredSubfields));
+ ;
+}
+
+void IcebergColumnHandle::registerSerDe() {
+ auto& registry = DeserializationRegistryForSharedPtr();
+ registry.Register("IcebergColumnHandle", IcebergColumnHandle::create);
+}
+
+std::string IcebergTableHandle::toString() const {
+ return TableHandleBase::toStringBase("IcebergTableHandle");
+}
+
+folly::dynamic IcebergTableHandle::serialize() const {
+ return TableHandleBase::serializeBase("IcebergTableHandle");
+}
+
+ConnectorTableHandlePtr IcebergTableHandle::create(
+   const folly::dynamic& obj,
+   void* context) {
+ return TableHandleBase::create(obj, context);
+}
+
+void IcebergTableHandle::registerSerDe() {
+ auto& registry = DeserializationWithContextRegistryForSharedPtr();
+ registry.Register("IcebergTableHandle", create);
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/IcebergTableHandle.h b/velox/connectors/lakehouse/iceberg/IcebergTableHandle.h
new file mode 100644
index 000000000000..c204bbb9b04e
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/IcebergTableHandle.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "TableHandleBase.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+class IcebergColumnHandle : public ColumnHandleBase {
+ public:
+  /// NOTE: 'dataType' is the column type in target write table. 'hiveType' is
+  /// converted type of the corresponding column in source table which might not
+  /// be the same type, and the table scan needs to do data coercion if needs.
+  /// The table writer also needs to respect the type difference when processing
+  /// input data such as bucket id calculation.
+  IcebergColumnHandle(
+      const std::string& name,
+      ColumnType columnType,
+      TypePtr dataType,
+      std::vector<velox::common::Subfield> requiredSubfields = {})
+      : ColumnHandleBase(
+            name,
+            columnType,
+            std::move(dataType),
+            std::move(requiredSubfields)) {}
+
+  virtual folly::dynamic serialize() const override;
+
+  virtual std::string toString() const override;
+
+  static ColumnHandlePtr create(const folly::dynamic& obj);
+
+  static void registerSerDe();
+
+  bool isPartitionKey() {
+    VELOX_NYI();
+  }
+};
+
+class IcebergTableHandle : public TableHandleBase {
+ public:
+  IcebergTableHandle(
+      std::string connectorId,
+      const std::string& tableName,
+      bool filterPushdownEnabled,
+      velox::common::SubfieldFilters subfieldFilters,
+      const core::TypedExprPtr& remainingFilter,
+      const RowTypePtr& dataColumns = nullptr,
+      const std::unordered_map<std::string, std::string>& tableParameters = {})
+      : TableHandleBase(
+            std::move(connectorId),
+            tableName,
+            filterPushdownEnabled,
+            std::move(subfieldFilters),
+            remainingFilter,
+            dataColumns,
+            tableParameters) {}
+
+  std::string toString() const override;
+
+  folly::dynamic serialize() const override;
+
+  static ConnectorTableHandlePtr create(
+      const folly::dynamic& obj,
+      void* context);
+
+  static void registerSerDe();
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.cpp b/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.cpp
new file mode 100644
index 000000000000..329af8edf1a9
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/PartitionIdGenerator.h"
+
+#include "velox/connectors/lakehouse/iceberg/IcebergPartitionUtil.h"
+#include "velox/dwio/catalog/fbhive/FileUtils.h"
+
+using namespace facebook::velox::dwio::catalog::fbhive;
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+PartitionIdGenerator::PartitionIdGenerator(
+    const RowTypePtr& inputType,
+    std::vector<column_index_t> partitionChannels,
+    uint32_t maxPartitions,
+    memory::MemoryPool* pool,
+    bool partitionPathAsLowerCase)
+    : pool_(pool),
+      partitionChannels_(std::move(partitionChannels)),
+      maxPartitions_(maxPartitions),
+      partitionPathAsLowerCase_(partitionPathAsLowerCase) {
+  VELOX_USER_CHECK(
+      !partitionChannels_.empty(), "There must be at least one partition key.");
+  for (auto channel : partitionChannels_) {
+    hashers_.emplace_back(
+        exec::VectorHasher::create(inputType->childAt(channel), channel));
+  }
+
+  std::vector<TypePtr> partitionKeyTypes;
+  std::vector<std::string> partitionKeyNames;
+  for (auto channel : partitionChannels_) {
+    VELOX_USER_CHECK(
+        exec::VectorHasher::typeKindSupportsValueIds(
+            inputType->childAt(channel)->kind()),
+        "Unsupported partition type: {}.",
+        inputType->childAt(channel)->toString());
+    partitionKeyTypes.push_back(inputType->childAt(channel));
+    partitionKeyNames.push_back(inputType->nameOf(channel));
+  }
+
+  partitionValues_ = BaseVector::create<RowVector>(
+      ROW(std::move(partitionKeyNames), std::move(partitionKeyTypes)),
+      maxPartitions_,
+      pool);
+  for (auto& key : partitionValues_->children()) {
+    key->resize(maxPartitions_);
+  }
+}
+
+void PartitionIdGenerator::run(
+    const RowVectorPtr& input,
+    raw_vector<uint64_t>& result) {
+  const auto numRows = input->size();
+  result.resize(numRows);
+
+  // Compute value IDs using VectorHashers and store these in 'result'.
+  computeValueIds(input, result);
+
+  // Convert value IDs in 'result' into partition IDs using partitionIds
+  // mapping. Update 'result' in place.
+
+  // TODO Optimize common use case where all records belong to the same
+  // partition. VectorHashers keep track of the number of unique values, hence,
+  // we can find out if there is only one unique value for each partition key.
+  for (auto i = 0; i < numRows; ++i) {
+    auto valueId = result[i];
+    auto it = partitionIds_.find(valueId);
+    if (it != partitionIds_.end()) {
+      result[i] = it->second;
+    } else {
+      uint64_t nextPartitionId = partitionIds_.size();
+      VELOX_USER_CHECK_LT(
+          nextPartitionId,
+          maxPartitions_,
+          "Exceeded limit of {} distinct partitions.",
+          maxPartitions_);
+
+      partitionIds_.emplace(valueId, nextPartitionId);
+      savePartitionValues(nextPartitionId, input, i);
+
+      result[i] = nextPartitionId;
+    }
+  }
+}
+
+std::string PartitionIdGenerator::partitionName(
+    uint64_t partitionId,
+    const std::string& nullValueName) const {
+  return FileUtils::makePartName(
+      extractPartitionKeyValues(partitionValues_, partitionId, nullValueName),
+      partitionPathAsLowerCase_);
+}
+
+void PartitionIdGenerator::computeValueIds(
+    const RowVectorPtr& input,
+    raw_vector<uint64_t>& valueIds) {
+  allRows_.resize(input->size());
+  allRows_.setAll();
+
+  bool rehash = false;
+  for (auto& hasher : hashers_) {
+    // NOTE: for boolean column type, computeValueIds() always returns true and
+    // this might cause problem in case of multiple boolean partition columns as
+    // we might not set the multiplier properly.
+    auto partitionVector = input->childAt(hasher->channel())->loadedVector();
+    hasher->decode(*partitionVector, allRows_);
+    if (!hasher->computeValueIds(allRows_, valueIds)) {
+      rehash = true;
+    }
+  }
+
+  if (!rehash && hasMultiplierSet_) {
+    return;
+  }
+
+  uint64_t multiplier = 1;
+  for (auto& hasher : hashers_) {
+    hasMultiplierSet_ = true;
+    multiplier = hasher->typeKind() == TypeKind::BOOLEAN
+        ? hasher->enableValueRange(multiplier, 50)
+        : hasher->enableValueIds(multiplier, 50);
+
+    VELOX_CHECK_NE(
+        multiplier,
+        exec::VectorHasher::kRangeTooLarge,
+        "Number of requested IDs is out of range.");
+  }
+
+  for (auto& hasher : hashers_) {
+    const bool ok = hasher->computeValueIds(allRows_, valueIds);
+    VELOX_CHECK(ok);
+  }
+
+  updateValueToPartitionIdMapping();
+}
+
+void PartitionIdGenerator::updateValueToPartitionIdMapping() {
+  if (partitionIds_.empty()) {
+    return;
+  }
+
+  const auto numPartitions = partitionIds_.size();
+
+  partitionIds_.clear();
+
+  raw_vector<uint64_t> newValueIds(numPartitions, pool_);
+  SelectivityVector rows(numPartitions);
+  for (auto i = 0; i < hashers_.size(); ++i) {
+    auto& hasher = hashers_[i];
+    hasher->decode(*partitionValues_->childAt(i), rows);
+    const bool ok = hasher->computeValueIds(rows, newValueIds);
+    VELOX_CHECK(ok);
+  }
+
+  for (auto i = 0; i < numPartitions; ++i) {
+    partitionIds_.emplace(newValueIds[i], i);
+  }
+}
+
+void PartitionIdGenerator::savePartitionValues(
+    uint64_t partitionId,
+    const RowVectorPtr& input,
+    vector_size_t row) {
+  for (auto i = 0; i < partitionChannels_.size(); ++i) {
+    auto channel = partitionChannels_[i];
+    partitionValues_->childAt(i)->copy(
+        input->childAt(channel).get(), partitionId, row, 1);
+  }
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.h b/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.h
new file mode 100644
index 000000000000..28ffbebb8b58
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/PartitionIdGenerator.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/exec/VectorHasher.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+/// Generate sequential integer IDs for distinct partition values, which could
+/// be used as vector index.
+class PartitionIdGenerator {
+ public:
+  /// @param inputType RowType of the input.
+  /// @param partitionChannels Channels of partition keys in the input
+  /// RowVector.
+  /// @param maxPartitions The max number of distinct partitions.
+  /// @param pool Memory pool. Used to allocate memory for storing unique
+  /// partition key values.
+  /// @param partitionPathAsLowerCase Used to control whether the partition path
+  /// need to convert to lower case.
+  PartitionIdGenerator(
+      const RowTypePtr& inputType,
+      std::vector<column_index_t> partitionChannels,
+      uint32_t maxPartitions,
+      memory::MemoryPool* pool,
+      bool partitionPathAsLowerCase);
+
+  /// Generate sequential partition IDs for input vector.
+  /// @param input Input RowVector.
+  /// @param result Generated integer IDs indexed by input row number.
+  void run(const RowVectorPtr& input, raw_vector<uint64_t>& result);
+
+  /// Return the total number of distinct partitions processed so far.
+  uint64_t numPartitions() const {
+    return partitionIds_.size();
+  }
+
+  /// Return partition name for the given partition id in the typical Hive
+  /// style. It is derived from the partitionValues_ at index partitionId.
+  /// Partition keys appear in the order of partition columns in the table
+  /// schema.
+  std::string partitionName(
+      uint64_t partitionId,
+      const std::string& nullValueName = "") const;
+
+ private:
+  static constexpr const int32_t kHasherReservePct = 20;
+
+  // Computes value IDs using VectorHashers for all rows in 'input'.
+  void computeValueIds(
+      const RowVectorPtr& input,
+      raw_vector<uint64_t>& valueIds);
+
+  // In case of rehash (when value IDs produced by VectorHashers change), we
+  // update value id for pre-existing partitions while keeping partition ids.
+  // This method rebuilds 'partitionIds_' by re-calculating the value ids using
+  // updated 'hashers_'.
+  void updateValueToPartitionIdMapping();
+
+  // Copies partition values of 'row' from 'input' into 'partitionId' row in
+  // 'partitionValues_'.
+  void savePartitionValues(
+      uint64_t partitionId,
+      const RowVectorPtr& input,
+      vector_size_t row);
+
+  memory::MemoryPool* const pool_;
+
+  const std::vector<column_index_t> partitionChannels_;
+
+  const uint32_t maxPartitions_;
+
+  const bool partitionPathAsLowerCase_;
+
+  std::vector<std::unique_ptr<exec::VectorHasher>> hashers_;
+  bool hasMultiplierSet_ = false;
+
+  // A mapping from value ID produced by VectorHashers to a partition ID.
+  std::unordered_map<uint64_t, uint64_t> partitionIds_;
+
+  // A vector holding unique partition key values. One row per partition. Row
+  // numbers match partition IDs.
+  RowVectorPtr partitionValues_;
+
+  // All rows are set valid to compute partition IDs for all input rows.
+  SelectivityVector allRows_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.cpp b/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.cpp
new file mode 100644
index 000000000000..981f97a5da12
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.cpp
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.h"
+
+#include "ConnectorUtil.h"
+#include "IcebergConnectorUtil.h"
+#include "IcebergDeleteFile.h"
+#include "IcebergMetadataColumns.h"
+#include "velox/dwio/common/ReaderFactory.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+PositionalDeleteFileReader::PositionalDeleteFileReader(
+    const IcebergDeleteFile& deleteFile,
+    const std::string& baseFilePath,
+    FileHandleFactory* fileHandleFactory,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    folly::Executor* executor,
+    const std::shared_ptr<const ConnectorConfigBase>
+        ConnectorConfigBase,
+    const std::shared_ptr<io::IoStatistics>& ioStats,
+    const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+    dwio::common::RuntimeStatistics& runtimeStats,
+    uint64_t splitOffset,
+    const std::string& connectorId)
+    : deleteFile_(deleteFile),
+      baseFilePath_(baseFilePath),
+      fileHandleFactory_(fileHandleFactory),
+      executor_(executor),
+      connectorQueryCtx_(connectorQueryCtx),
+      connectorConfig_(ConnectorConfigBase),
+      ioStats_(ioStats),
+      fsStats_(fsStats),
+      pool_(connectorQueryCtx->memoryPool()),
+      filePathColumn_(IcebergMetadataColumn::icebergDeleteFilePathColumn()),
+      posColumn_(IcebergMetadataColumn::icebergDeletePosColumn()),
+      splitOffset_(splitOffset),
+      deleteSplit_(nullptr),
+      deleteRowReader_(nullptr),
+      deletePositionsOutput_(nullptr),
+      deletePositionsOffset_(0),
+      totalNumRowsScanned_(0) {
+  VELOX_CHECK(deleteFile_.content == FileContent::kPositionalDeletes);
+  VELOX_CHECK(deleteFile_.recordCount);
+
+  // TODO: check if the lowerbounds and upperbounds in deleteFile overlap with
+  //  this batch. If not, no need to proceed.
+
+  // Create the ScanSpec for this delete file
+  auto scanSpec = std::make_shared<velox::common::ScanSpec>("<root>");
+  scanSpec->addField(posColumn_->name, 0);
+  auto* pathSpec = scanSpec->getOrCreateChild(filePathColumn_->name);
+  pathSpec->setFilter(
+      std::make_unique<velox::common::BytesValues>(
+          std::vector<std::string>({baseFilePath_}), false));
+
+  // Create the file schema (in RowType) and split that will be used by readers
+  std::vector<std::string> deleteColumnNames(
+      {filePathColumn_->name, posColumn_->name});
+  std::vector<std::shared_ptr<const Type>> deleteColumnTypes(
+      {filePathColumn_->type, posColumn_->type});
+  RowTypePtr deleteFileSchema =
+      ROW(std::move(deleteColumnNames), std::move(deleteColumnTypes));
+
+  deleteSplit_ = std::make_shared<IcebergConnectorSplit>(
+      connectorId,
+      deleteFile_.filePath,
+      deleteFile_.fileFormat,
+      0,
+      deleteFile_.fileSizeInBytes);
+
+  // Create the Reader and RowReader
+
+  dwio::common::ReaderOptions deleteReaderOpts(pool_);
+  configureReaderOptions(
+      connectorConfig_,
+      connectorQueryCtx,
+      deleteFileSchema,
+      deleteSplit_,
+      /*tableParameters=*/{},
+      deleteReaderOpts);
+
+  const FileHandleKey fileHandleKey{
+      .filename = deleteFile_.filePath,
+      .tokenProvider = connectorQueryCtx_->fsTokenProvider()};
+  auto deleteFileHandleCachePtr = fileHandleFactory_->generate(fileHandleKey);
+  auto deleteFileInput = createBufferedInput(
+      *deleteFileHandleCachePtr,
+      deleteReaderOpts,
+      connectorQueryCtx,
+      ioStats_,
+      fsStats_,
+      executor_);
+
+  auto deleteReader =
+      dwio::common::getReaderFactory(deleteReaderOpts.fileFormat())
+          ->createReader(std::move(deleteFileInput), deleteReaderOpts);
+
+  // Check if the whole delete file split can be skipped. This could happen when
+  // 1) the delete file doesn't contain the base file that is being read; 2) The
+  // delete file does not contain the positions in the current batch for the
+  // base file.
+  if (!iceberg::filterSplit(
+          scanSpec.get(),
+          deleteReader.get(),
+          deleteSplit_->filePath,
+          deleteSplit_->partitionKeys,
+          {},
+          true)) {
+    // We only count the number of base splits skipped as skippedSplits runtime
+    // statistics in Velox.  Skipped delta split is only counted as skipped
+    // bytes.
+    runtimeStats.skippedSplitBytes += deleteSplit_->length;
+    deleteSplit_.reset();
+    return;
+  }
+
+  dwio::common::RowReaderOptions deleteRowReaderOpts;
+  configureRowReaderOptions(
+      {},
+      scanSpec,
+      nullptr,
+      deleteFileSchema,
+      deleteSplit_,
+      nullptr,
+      nullptr,
+      deleteRowReaderOpts);
+
+  deleteRowReader_.reset();
+  deleteRowReader_ = deleteReader->createRowReader(deleteRowReaderOpts);
+}
+
+void PositionalDeleteFileReader::readDeletePositions(
+    uint64_t baseReadOffset,
+    uint64_t size,
+    BufferPtr deleteBitmapBuffer) {
+  // We are going to read to the row number up to the end of the batch. For the
+  // same base file, the deleted rows are in ascending order in the same delete
+  // file. rowNumberUpperBound is the upperbound for the row number in this
+  // batch, excluding boundaries
+  int64_t rowNumberUpperBound = splitOffset_ + baseReadOffset + size;
+
+  // Finish unused delete positions from last batch.
+  if (deletePositionsOutput_ &&
+      deletePositionsOffset_ < deletePositionsOutput_->size()) {
+    updateDeleteBitmap(
+        std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)
+            ->childAt(0),
+        baseReadOffset,
+        rowNumberUpperBound,
+        deleteBitmapBuffer);
+
+    if (readFinishedForBatch(rowNumberUpperBound)) {
+      return;
+    }
+  }
+
+  if (!deleteRowReader_ || !deleteSplit_) {
+    return;
+  }
+
+  // Read the new delete positions for this batch into deletePositionsOutput_
+  // and update the delete bitmap
+  auto outputType = posColumn_->type;
+  RowTypePtr outputRowType = ROW({posColumn_->name}, {posColumn_->type});
+  if (!deletePositionsOutput_) {
+    deletePositionsOutput_ = BaseVector::create(outputRowType, 0, pool_);
+  }
+
+  do {
+    auto rowsScanned = deleteRowReader_->next(size, deletePositionsOutput_);
+    totalNumRowsScanned_ += rowsScanned;
+
+    if (rowsScanned > 0) {
+      VELOX_CHECK(
+          !deletePositionsOutput_->mayHaveNulls(),
+          "Iceberg delete file pos column cannot have nulls");
+
+      auto numDeletedRows = deletePositionsOutput_->size();
+      if (numDeletedRows > 0) {
+        deletePositionsOutput_->loadedVector();
+        deletePositionsOffset_ = 0;
+
+        // Convert the row numbers to set bits, up to rowNumberUpperBound.
+        // Beyond that the buffer of deleteBitMap is not available.
+        updateDeleteBitmap(
+            std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)
+                ->childAt(0),
+            baseReadOffset,
+            rowNumberUpperBound,
+            deleteBitmapBuffer);
+      }
+    } else {
+      // Reaching the end of the file
+      deleteSplit_.reset();
+      break;
+    }
+  } while (!readFinishedForBatch(rowNumberUpperBound));
+}
+
+bool PositionalDeleteFileReader::noMoreData() {
+  return totalNumRowsScanned_ >= deleteFile_.recordCount &&
+      deletePositionsOutput_ &&
+      deletePositionsOffset_ >= deletePositionsOutput_->size();
+}
+
+void PositionalDeleteFileReader::updateDeleteBitmap(
+    VectorPtr deletePositionsVector,
+    uint64_t baseReadOffset,
+    int64_t rowNumberUpperBound,
+    BufferPtr deleteBitmapBuffer) {
+  auto deleteBitmap = deleteBitmapBuffer->asMutable<uint8_t>();
+
+  // Convert the positions in file into positions relative to the start of the
+  // split.
+  const int64_t* deletePositions =
+      deletePositionsVector->as<FlatVector<int64_t>>()->rawValues();
+  int64_t rowNumberLowerBound = baseReadOffset + splitOffset_;
+
+  // If the rowNumberLowerBound is greater than the last position in this delete
+  // rows batch, nothing to delete.
+  if (rowNumberLowerBound >
+      deletePositions[deletePositionsVector->size() - 1]) {
+    return;
+  }
+
+  // Skip the delete positions in deletePositionsVector until they are in the
+  // [rowNumberLowerBound, rowNumberUpperBound) range.
+  while (deletePositionsOffset_ < deletePositionsVector->size() &&
+         deletePositions[deletePositionsOffset_] < rowNumberLowerBound) {
+    deletePositionsOffset_++;
+  }
+  while (deletePositionsOffset_ < deletePositionsVector->size() &&
+         deletePositions[deletePositionsOffset_] < rowNumberUpperBound) {
+    bits::setBit(
+        deleteBitmap,
+        deletePositions[deletePositionsOffset_] - rowNumberLowerBound);
+    deletePositionsOffset_++;
+  }
+
+  deleteBitmapBuffer->setSize(
+      std::max(
+          static_cast<uint64_t>(deleteBitmapBuffer->size()),
+          deletePositionsOffset_ == 0 ||
+                  (deletePositionsOffset_ < deletePositionsVector->size() &&
+                   deletePositions[deletePositionsOffset_] >=
+                       rowNumberUpperBound)
+              ? 0
+              : bits::nbytes(
+                    deletePositions[deletePositionsOffset_ - 1] + 1 -
+                    rowNumberLowerBound)));
+}
+
+bool PositionalDeleteFileReader::readFinishedForBatch(
+    int64_t rowNumberUpperBound) {
+  VELOX_CHECK_NOT_NULL(deletePositionsOutput_);
+
+  auto deletePositionsVector =
+      std::dynamic_pointer_cast<RowVector>(deletePositionsOutput_)->childAt(0);
+  const int64_t* deletePositions =
+      deletePositionsVector->as<FlatVector<int64_t>>()->rawValues();
+
+  // We've read enough of the delete positions from this delete file when 1) it
+  // reaches the end of the file, or 2) the last read delete position is greater
+  // than the largest base file row number that is going to be read in this
+  // batch
+  if (totalNumRowsScanned_ >= deleteFile_.recordCount ||
+      (deletePositionsVector->size() != 0 &&
+       (deletePositionsOffset_ < deletePositionsVector->size() &&
+        deletePositions[deletePositionsOffset_] >= rowNumberUpperBound))) {
+    return true;
+  }
+  return false;
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.h b/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.h
new file mode 100644
index 000000000000..fe5c82f37c32
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/PositionalDeleteFileReader.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "FileHandle.h"
+#include "IcebergConnectorSplit.h"
+#include "velox/dwio/common/Reader.h"
+
+#include <folly/Executor.h>
+
+#include <memory>
+
+namespace facebook::velox::connector::lakehouse::iceberg  {
+
+struct IcebergDeleteFile;
+struct IcebergMetadataColumn;
+
+class PositionalDeleteFileReader {
+ public:
+  PositionalDeleteFileReader(
+      const IcebergDeleteFile& deleteFile,
+      const std::string& baseFilePath,
+      FileHandleFactory* fileHandleFactory,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      folly::Executor* executor,
+      const std::shared_ptr<const ConnectorConfigBase> ConnectorConfigBase,
+      const std::shared_ptr<io::IoStatistics>& ioStats,
+      const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+      dwio::common::RuntimeStatistics& runtimeStats,
+      uint64_t splitOffset,
+      const std::string& connectorId);
+
+  void readDeletePositions(
+      uint64_t baseReadOffset,
+      uint64_t size,
+      BufferPtr deleteBitmap);
+
+  bool noMoreData();
+
+ private:
+  void updateDeleteBitmap(
+      VectorPtr deletePositionsVector,
+      uint64_t baseReadOffset,
+      int64_t rowNumberUpperBound,
+      BufferPtr deleteBitmapBuffer);
+
+  bool readFinishedForBatch(int64_t rowNumberUpperBound);
+
+  const IcebergDeleteFile& deleteFile_;
+  const std::string& baseFilePath_;
+  FileHandleFactory* const fileHandleFactory_;
+  folly::Executor* const executor_;
+  const ConnectorQueryCtx* connectorQueryCtx_;
+  const std::shared_ptr<const ConnectorConfigBase> connectorConfig_;
+  const std::shared_ptr<io::IoStatistics> ioStats_;
+  const std::shared_ptr<filesystems::File::IoStats> fsStats_;
+  const std::shared_ptr<filesystems::File::IoStats> fsStats;
+  memory::MemoryPool* const pool_;
+
+  std::shared_ptr<IcebergMetadataColumn> filePathColumn_;
+  std::shared_ptr<IcebergMetadataColumn> posColumn_;
+  uint64_t splitOffset_;
+
+  std::shared_ptr<IcebergConnectorSplit> deleteSplit_;
+  std::unique_ptr<dwio::common::RowReader> deleteRowReader_;
+  // The vector to hold the delete positions read from the positional delete
+  // file. These positions are relative to the start of the whole base data
+  // file.
+  VectorPtr deletePositionsOutput_;
+  // The index of deletePositionsOutput_ that indicates up to where the delete
+  // positions have been converted into the bitmap
+  uint64_t deletePositionsOffset_;
+  // Total number of rows read from this positional delete file reader,
+  // including the rows filtered out from filters on both filePathColumn_ and
+  // posColumn_.
+  uint64_t totalNumRowsScanned_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/SplitReaderBase.cpp b/velox/connectors/lakehouse/iceberg/SplitReaderBase.cpp
new file mode 100644
index 000000000000..f8903c09a2f7
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/SplitReaderBase.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Adapted from velox/connectors/hive/SplitReader.cpp
+
+#include "SplitReaderBase.h"
+
+#include "ConnectorConfigBase.h"
+#include "ConnectorSplitBase.h"
+#include "ConnectorUtil.h"
+#include "velox/common/caching/CacheTTLController.h"
+#include "velox/dwio/common/ReaderFactory.h"
+
+using namespace facebook::velox::common;
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+SplitReaderBase::SplitReaderBase(
+    const std::shared_ptr<const ConnectorSplitBase>& split,
+    const std::shared_ptr<const TableHandleBase>& tableHandle,
+    const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>*
+        partitionColumnHandles,
+    const ConnectorQueryCtx* connectorQueryCtx,
+    const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+    const RowTypePtr& readerOutputType,
+    const std::shared_ptr<io::IoStatistics>& ioStats,
+    const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+    FileHandleFactory* fileHandleFactory,
+    folly::Executor* executor,
+    const std::shared_ptr<ScanSpec>& scanSpec)
+    : split_(split),
+      tableHandle_(tableHandle),
+      partitionColumnHandles_(partitionColumnHandles),
+      connectorQueryCtx_(connectorQueryCtx),
+      connectorConfig_(ConnectorConfigBase),
+      readerOutputType_(readerOutputType),
+      ioStats_(ioStats),
+      fsStats_(fsStats),
+      fileHandleFactory_(fileHandleFactory),
+      executor_(executor),
+      pool_(connectorQueryCtx->memoryPool()),
+      scanSpec_(scanSpec),
+      baseReaderOpts_(connectorQueryCtx->memoryPool()),
+      emptySplit_(false) {}
+
+void SplitReaderBase::configureReaderOptions(
+    std::shared_ptr<velox::random::RandomSkipTracker> randomSkip) {
+  lakehouse::iceberg::configureReaderOptions(
+      connectorConfig_,
+      connectorQueryCtx_,
+      tableHandle_->dataColumns(),
+      split_,
+      tableHandle_->tableParameters(),
+      baseReaderOpts_);
+  baseReaderOpts_.setRandomSkip(std::move(randomSkip));
+  baseReaderOpts_.setScanSpec(scanSpec_);
+  baseReaderOpts_.setFileFormat(split_->fileFormat);
+}
+
+void SplitReaderBase::prepareSplit(
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  createReader();
+  if (emptySplit_) {
+    return;
+  }
+  auto rowType = getAdaptedRowType();
+
+  if (checkIfSplitIsEmpty(runtimeStats)) {
+    VELOX_CHECK(emptySplit_);
+    return;
+  }
+
+  createRowReader(std::move(metadataFilter), std::move(rowType));
+}
+
+uint64_t SplitReaderBase::next(uint64_t size, VectorPtr& output) {
+  if (!baseReaderOpts_.randomSkip()) {
+    return baseRowReader_->next(size, output);
+  }
+  dwio::common::Mutation mutation;
+  mutation.randomSkip = baseReaderOpts_.randomSkip().get();
+  return baseRowReader_->next(size, output, &mutation);
+}
+
+void SplitReaderBase::resetFilterCaches() {
+  if (baseRowReader_) {
+    baseRowReader_->resetFilterCaches();
+  }
+}
+
+bool SplitReaderBase::emptySplit() const {
+  return emptySplit_;
+}
+
+void SplitReaderBase::resetSplit() {
+  split_.reset();
+}
+
+int64_t SplitReaderBase::estimatedRowSize() const {
+  if (!baseRowReader_) {
+    return DataSource::kUnknownRowSize;
+  }
+
+  const auto size = baseRowReader_->estimatedRowSize();
+  return size.value_or(DataSource::kUnknownRowSize);
+}
+
+void SplitReaderBase::updateRuntimeStats(
+    dwio::common::RuntimeStatistics& stats) const {
+  if (baseRowReader_) {
+    baseRowReader_->updateRuntimeStats(stats);
+  }
+}
+
+bool SplitReaderBase::allPrefetchIssued() const {
+  return baseRowReader_ && baseRowReader_->allPrefetchIssued();
+}
+
+void SplitReaderBase::setConnectorQueryCtx(
+    const ConnectorQueryCtx* connectorQueryCtx) {
+  connectorQueryCtx_ = connectorQueryCtx;
+}
+
+void SplitReaderBase::createReader() {
+  VELOX_CHECK_NE(
+      baseReaderOpts_.fileFormat(), dwio::common::FileFormat::UNKNOWN);
+
+  FileHandleCachedPtr fileHandleCachePtr;
+  FileHandleKey fileHandleKey{
+      .filename = split_->filePath,
+      .tokenProvider = connectorQueryCtx_->fsTokenProvider()};
+  try {
+    fileHandleCachePtr = fileHandleFactory_->generate(
+        fileHandleKey,
+        split_->properties.has_value() ? &*split_->properties : nullptr,
+        fsStats_ ? fsStats_.get() : nullptr);
+    VELOX_CHECK_NOT_NULL(fileHandleCachePtr.get());
+  } catch (const VeloxRuntimeError& e) {
+    if (e.errorCode() == error_code::kFileNotFound) {
+      emptySplit_ = true;
+      return;
+    }
+    throw;
+  }
+
+  // Here we keep adding new entries to CacheTTLController when new fileHandles
+  // are generated, if CacheTTLController was created. Creator of
+  // CacheTTLController needs to make sure a size control strategy was available
+  // such as removing aged out entries.
+  if (auto* cacheTTLController = cache::CacheTTLController::getInstance()) {
+    cacheTTLController->addOpenFileInfo(fileHandleCachePtr->uuid.id());
+  }
+  auto baseFileInput = createBufferedInput(
+      *fileHandleCachePtr,
+      baseReaderOpts_,
+      connectorQueryCtx_,
+      ioStats_,
+      fsStats_,
+      executor_);
+
+  baseReader_ = dwio::common::getReaderFactory(baseReaderOpts_.fileFormat())
+                    ->createReader(std::move(baseFileInput), baseReaderOpts_);
+}
+
+RowTypePtr SplitReaderBase::getAdaptedRowType() const {
+  auto& fileType = baseReader_->rowType();
+  auto columnTypes = adaptColumns(fileType, baseReaderOpts_.fileSchema());
+  auto columnNames = fileType->names();
+  return ROW(std::move(columnNames), std::move(columnTypes));
+}
+
+bool SplitReaderBase::checkIfSplitIsEmpty(
+    dwio::common::RuntimeStatistics& runtimeStats) {
+  // emptySplit_ may already be set if the data file is not found. In this
+  // case we don't need to test further.
+  if (emptySplit_) {
+    return true;
+  }
+
+  if (!baseReader_ || baseReader_->numberOfRows() == 0 ||
+      !filterSplit(runtimeStats)) {
+    emptySplit_ = true;
+    ++runtimeStats.skippedSplits;
+    runtimeStats.skippedSplitBytes += split_->length;
+  } else {
+    ++runtimeStats.processedSplits;
+  }
+
+  return emptySplit_;
+}
+
+void SplitReaderBase::createRowReader(
+    std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+    RowTypePtr rowType) {
+  VELOX_CHECK_NULL(baseRowReader_);
+  configureRowReaderOptions(
+      tableHandle_->tableParameters(),
+      scanSpec_,
+      std::move(metadataFilter),
+      std::move(rowType),
+      split_,
+      connectorConfig_,
+      connectorQueryCtx_->sessionProperties(),
+      baseRowReaderOpts_);
+  baseRowReader_ = baseReader_->createRowReader(baseRowReaderOpts_);
+}
+
+std::vector<TypePtr> SplitReaderBase::adaptColumns(
+    const RowTypePtr& fileType,
+    const std::shared_ptr<const velox::RowType>& tableSchema) const {
+  // Keep track of schema types for columns in file, used by ColumnSelector.
+  std::vector<TypePtr> columnTypes = fileType->children();
+
+  auto& childrenSpecs = scanSpec_->children();
+  for (size_t i = 0; i < childrenSpecs.size(); ++i) {
+    auto* childSpec = childrenSpecs[i].get();
+    const std::string& fieldName = childSpec->fieldName();
+
+    // Partition keys will be handled by the corresponding connector's
+    // SplitReaderBases.
+    if (auto iter = split_->infoColumns.find(fieldName);
+        iter != split_->infoColumns.end()) {
+      auto infoColumnType =
+          readerOutputType_->childAt(readerOutputType_->getChildIdx(fieldName));
+      auto constant = VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH_ALL(
+          newConstantFromString,
+          infoColumnType->kind(),
+          infoColumnType,
+          iter->second,
+          1,
+          connectorQueryCtx_->memoryPool(),
+          connectorQueryCtx_->sessionTimezone(),
+          connectorConfig_->readTimestampPartitionValueAsLocalTime(
+              connectorQueryCtx_->sessionProperties()));
+      childSpec->setConstantValue(constant);
+    } else if (
+        childSpec->columnType() ==
+        velox::common::ScanSpec::ColumnType::kRegular) {
+      auto fileTypeIdx = fileType->getChildIdxIfExists(fieldName);
+      if (!fileTypeIdx.has_value()) {
+        // Column is missing. Most likely due to schema evolution.
+        VELOX_CHECK(tableSchema, "Unable to resolve column '{}'", fieldName);
+        childSpec->setConstantValue(
+            BaseVector::createNullConstant(
+                tableSchema->findChild(fieldName),
+                1,
+                connectorQueryCtx_->memoryPool()));
+      } else {
+        // Column no longer missing, reset constant value set on the spec.
+        childSpec->setConstantValue(nullptr);
+        auto outputTypeIdx = readerOutputType_->getChildIdxIfExists(fieldName);
+        if (outputTypeIdx.has_value()) {
+          auto& outputType = readerOutputType_->childAt(*outputTypeIdx);
+          auto& columnType = columnTypes[*fileTypeIdx];
+          if (childSpec->isFlatMapAsStruct()) {
+            // Flat map column read as struct.  Leave the schema type as MAP.
+            VELOX_CHECK(outputType->isRow() && columnType->isMap());
+          } else {
+            // We know the fieldName exists in the file, make the type at that
+            // position match what we expect in the output.
+            columnType = outputType;
+          }
+        }
+      }
+    }
+  }
+
+  scanSpec_->resetCachedValues(false);
+
+  return columnTypes;
+}
+
+std::string SplitReaderBase::toStringBase(const std::string& className) const {
+  std::string partitionKeys;
+  std::for_each(
+      partitionColumnHandles_->begin(),
+      partitionColumnHandles_->end(),
+      [&](const auto& column) {
+        partitionKeys += " " + column.second->toStringBase(className);
+      });
+  return fmt::format(
+      "{}: split_{} scanSpec_{} readerOutputType_{} partitionColumnHandles_{} reader{} rowReader{}",
+      className,
+      split_->toString(),
+      scanSpec_->toString(),
+      readerOutputType_->toString(),
+      partitionKeys,
+      static_cast<const void*>(baseReader_.get()),
+      static_cast<const void*>(baseRowReader_.get()));
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/SplitReaderBase.h b/velox/connectors/lakehouse/iceberg/SplitReaderBase.h
new file mode 100644
index 000000000000..f91ea8123a2c
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/SplitReaderBase.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ConnectorConfigBase.h"
+#include "ConnectorSplitBase.h"
+#include "FileHandle.h"
+#include "TableHandleBase.h"
+#include "velox/common/base/RandomUtil.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/Connector.h"
+#include "velox/dwio/common/Options.h"
+#include "velox/dwio/common/Reader.h"
+#include "velox/type/Type.h"
+
+#include <random>
+#include <shared_mutex>
+#include <unordered_map>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class SplitReaderBase {
+ public:
+  SplitReaderBase(
+      const std::shared_ptr<const ConnectorSplitBase>& split,
+      const std::shared_ptr<const TableHandleBase>& tableHandle,
+      const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>*
+          partitionColumnHandles,
+      const ConnectorQueryCtx* connectorQueryCtx,
+      const std::shared_ptr<const ConnectorConfigBase>& ConnectorConfigBase,
+      const RowTypePtr& readerOutputType,
+      const std::shared_ptr<io::IoStatistics>& ioStats,
+      const std::shared_ptr<filesystems::File::IoStats>& fsStats,
+      FileHandleFactory* fileHandleFactory,
+      folly::Executor* executor,
+      const std::shared_ptr<velox::common::ScanSpec>& scanSpec);
+
+  virtual ~SplitReaderBase() = default;
+
+  void configureReaderOptions(
+      std::shared_ptr<random::RandomSkipTracker> randomSkip);
+
+  /// This function is used by different table formats like Iceberg and Hudi to
+  /// do additional preparations before reading the split, e.g. Open delete
+  /// files or log files, and add column adapatations for metadata columns. It
+  /// would be called only once per incoming split
+  virtual void prepareSplit(
+      std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+      dwio::common::RuntimeStatistics& runtimeStats);
+
+  virtual uint64_t next(uint64_t size, VectorPtr& output);
+
+  void resetFilterCaches();
+
+  bool emptySplit() const;
+
+  void resetSplit();
+
+  int64_t estimatedRowSize() const;
+
+  void updateRuntimeStats(dwio::common::RuntimeStatistics& stats) const;
+
+  bool allPrefetchIssued() const;
+
+  void setConnectorQueryCtx(const ConnectorQueryCtx* connectorQueryCtx);
+
+  const RowTypePtr& readerOutputType() const {
+    return readerOutputType_;
+  }
+
+  std::string toString() const;
+
+ protected:
+  /// Create the dwio::common::Reader object baseReader_, which will be
+  /// used to read the data file's metadata and schema
+  void createReader();
+
+  // Adjust the scan spec according to the current split, then return the
+  // adapted row type.
+  RowTypePtr getAdaptedRowType() const;
+
+  /// Check if the split_ is empty. The split is considered empty when
+  ///   1) The data file is missing but the user chooses to ignore it
+  ///   2) The file does not contain any rows
+  ///   3) The data in the file does not pass the filters. The test is based on
+  ///      the file metadata and partition key values
+  /// This function needs to be called after baseReader_ is created.
+  bool checkIfSplitIsEmpty(dwio::common::RuntimeStatistics& runtimeStats);
+
+  // Check if the filters pass on the column statistics.  When delta update is
+  // present, the corresonding filter should be disabled before calling this
+  // function.
+  virtual bool filterSplit(
+      dwio::common::RuntimeStatistics& runtimeStats) const {
+    VELOX_UNREACHABLE();
+  }
+
+  /// Create the dwio::common::RowReader object baseRowReader_, which owns
+  /// the ColumnReaders that will be used to read the data
+  void createRowReader(
+      std::shared_ptr<velox::common::MetadataFilter> metadataFilter,
+      RowTypePtr rowType);
+
+  /// Different table formats may have different meatadata columns.
+  /// This function will be used to update the scanSpec for these columns.
+  virtual std::vector<TypePtr> adaptColumns(
+      const RowTypePtr& fileType,
+      const std::shared_ptr<const velox::RowType>& tableSchema) const;
+
+  std::string toStringBase(const std::string& className) const;
+
+  std::shared_ptr<const ConnectorSplitBase> split_;
+  const std::shared_ptr<const TableHandleBase> tableHandle_;
+  const std::unordered_map<std::string, std::shared_ptr<const ColumnHandleBase>>*
+      partitionColumnHandles_;
+  const ConnectorQueryCtx* connectorQueryCtx_;
+  const std::shared_ptr<const ConnectorConfigBase> connectorConfig_;
+
+  RowTypePtr readerOutputType_;
+  const std::shared_ptr<io::IoStatistics> ioStats_;
+  const std::shared_ptr<filesystems::File::IoStats> fsStats_;
+  FileHandleFactory* const fileHandleFactory_;
+  folly::Executor* const executor_;
+  memory::MemoryPool* const pool_;
+
+  std::shared_ptr<velox::common::ScanSpec> scanSpec_;
+  std::unique_ptr<dwio::common::Reader> baseReader_;
+  std::unique_ptr<dwio::common::RowReader> baseRowReader_;
+  dwio::common::ReaderOptions baseReaderOpts_;
+  dwio::common::RowReaderOptions baseRowReaderOpts_;
+  bool emptySplit_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/TableHandleBase.cpp b/velox/connectors/lakehouse/iceberg/TableHandleBase.cpp
new file mode 100644
index 000000000000..83461972ef5e
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/TableHandleBase.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TableHandleBase.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+namespace {
+std::unordered_map<ColumnHandleBase::ColumnType, std::string>
+columnTypeNames() {
+  return {
+      {ColumnHandleBase::ColumnType::kPartitionKey, "PartitionKey"},
+      {ColumnHandleBase::ColumnType::kRegular, "Regular"},
+      {ColumnHandleBase::ColumnType::kSynthesized, "Synthesized"},
+      //      {ColumnHandleBase::ColumnType::kHiveRowIndex, "RowIndex"},
+  };
+}
+
+template <typename K, typename V>
+std::unordered_map<V, K> invertMap(const std::unordered_map<K, V>& mapping) {
+  std::unordered_map<V, K> inverted;
+  for (const auto& [key, value] : mapping) {
+    inverted.emplace(value, key);
+  }
+  return inverted;
+}
+
+} // namespace
+
+ColumnHandleBase::ColumnType ColumnHandleBase::columnTypeFromName(
+    const std::string& name) {
+  static const auto nameColumnTypes = invertMap(columnTypeNames());
+  return nameColumnTypes.at(name);
+}
+
+folly::dynamic ColumnHandleBase::serializeBase(std::string_view name) const {
+  folly::dynamic obj = ColumnHandle::serializeBase(name);
+  obj["columnType"] = columnTypeName(columnType_);
+  obj["dataType"] = dataType_->serialize();
+  folly::dynamic requiredSubfields = folly::dynamic::array;
+  for (const auto& subfield : requiredSubfields_) {
+    requiredSubfields.push_back(subfield.toString());
+  }
+  obj["requiredSubfields"] = requiredSubfields;
+  return obj;
+}
+
+std::string ColumnHandleBase::toStringBase(const std::string& className) const {
+  std::ostringstream out;
+  out << fmt::format(
+      "{} [columnName: {}, columnType: {}, dataType: {},",
+      className,
+      columnName_,
+      columnTypeName(columnType_),
+      dataType_->toString());
+  out << " requiredSubfields: [";
+  for (const auto& subfield : requiredSubfields_) {
+    out << " " << subfield.toString();
+  }
+  out << " ]]";
+  return out.str();
+}
+
+std::string ColumnHandleBase::columnTypeName(
+    ColumnHandleBase::ColumnType type) {
+  static const auto ctNames = columnTypeNames();
+  return ctNames.at(type);
+}
+
+ConnectorTableHandlePtr TableHandleBase::create(
+    const folly::dynamic& obj,
+    void* context) {
+  auto connectorId = obj["connectorId"].asString();
+  auto tableName = obj["tableName"].asString();
+  auto filterPushdownEnabled = obj["filterPushdownEnabled"].asBool();
+
+  core::TypedExprPtr remainingFilter;
+  if (auto it = obj.find("remainingFilter"); it != obj.items().end()) {
+    remainingFilter =
+        ISerializable::deserialize<core::ITypedExpr>(it->second, context);
+  }
+
+  velox::common::SubfieldFilters subfieldFilters;
+  folly::dynamic subfieldFiltersObj = obj["subfieldFilters"];
+  for (const auto& subfieldFilter : subfieldFiltersObj) {
+    velox::common::Subfield subfield(subfieldFilter["subfield"].asString());
+    auto filter = ISerializable::deserialize<velox::common::Filter>(
+        subfieldFilter["filter"]);
+    subfieldFilters[velox::common::Subfield(std::move(subfield.path()))] =
+        filter->clone();
+  }
+
+  RowTypePtr dataColumns;
+  if (auto it = obj.find("dataColumns"); it != obj.items().end()) {
+    dataColumns = ISerializable::deserialize<RowType>(it->second, context);
+  }
+
+  std::unordered_map<std::string, std::string> tableParameters{};
+  const auto& tableParametersObj = obj["tableParameters"];
+  for (const auto& key : tableParametersObj.keys()) {
+    const auto& value = tableParametersObj[key];
+    tableParameters.emplace(key.asString(), value.asString());
+  }
+
+  return std::make_shared<const TableHandleBase>(
+      connectorId,
+      tableName,
+      filterPushdownEnabled,
+      std::move(subfieldFilters),
+      remainingFilter,
+      dataColumns,
+      tableParameters);
+}
+
+folly::dynamic TableHandleBase::serializeBase(
+    const std::string& className) const {
+  folly::dynamic obj = ConnectorTableHandle::serializeBase(className);
+  obj["tableName"] = tableName_;
+  obj["filterPushdownEnabled"] = filterPushdownEnabled_;
+
+  folly::dynamic subfieldFilters = folly::dynamic::array;
+  for (const auto& [subfield, filter] : subfieldFilters_) {
+    folly::dynamic pair = folly::dynamic::object;
+    pair["subfield"] = subfield.toString();
+    pair["filter"] = filter->serialize();
+    subfieldFilters.push_back(pair);
+  }
+
+  obj["subfieldFilters"] = subfieldFilters;
+  if (remainingFilter_) {
+    obj["remainingFilter"] = remainingFilter_->serialize();
+  }
+  if (dataColumns_) {
+    obj["dataColumns"] = dataColumns_->serialize();
+  }
+  folly::dynamic tableParameters = folly::dynamic::object;
+  for (const auto& param : tableParameters_) {
+    tableParameters[param.first] = param.second;
+  }
+  obj["tableParameters"] = tableParameters;
+
+  return obj;
+}
+
+std::string TableHandleBase::toStringBase(const std::string& className) const {
+  std::stringstream out;
+  out << className << " [table: " << tableName_;
+
+  if (!subfieldFilters_.empty()) {
+    // Sort filters by subfield for deterministic output.
+    std::map<std::string, velox::common::Filter*> orderedFilters;
+    for (const auto& [field, filter] : subfieldFilters_) {
+      orderedFilters[field.toString()] = filter.get();
+    }
+    out << ", range filters: [";
+    bool notFirstFilter = false;
+    for (const auto& [field, filter] : orderedFilters) {
+      if (notFirstFilter) {
+        out << ", ";
+      }
+      out << "(" << field << ", " << filter->toString() << ")";
+      notFirstFilter = true;
+    }
+    out << "]";
+  }
+  if (remainingFilter_) {
+    out << ", remaining filter: (" << remainingFilter_->toString() << ")";
+  }
+
+  if (dataColumns_) {
+    out << ", data columns: " << dataColumns_->toString();
+  }
+
+  if (!tableParameters_.empty()) {
+    std::map<std::string, std::string> orderedTableParameters{
+        tableParameters_.begin(), tableParameters_.end()};
+    out << ", table parameters: [";
+    bool firstParam = true;
+    for (const auto& param : orderedTableParameters) {
+      if (!firstParam) {
+        out << ", ";
+      }
+      out << param.first << ":" << param.second;
+      firstParam = false;
+    }
+    out << "]";
+  }
+
+  out << "]";
+  return out.str();
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/TableHandleBase.h b/velox/connectors/lakehouse/iceberg/TableHandleBase.h
new file mode 100644
index 000000000000..1e4926af6c25
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/TableHandleBase.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/connectors/Connector.h"
+#include "velox/core/ITypedExpr.h"
+#include "velox/type/Filter.h"
+#include "velox/type/Subfield.h"
+#include "velox/type/Type.h"
+
+#include <folly/dynamic.h>
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg {
+
+class ColumnHandleBase : public connector::ColumnHandle {
+ public:
+  enum class ColumnType {
+    kPartitionKey,
+    kRegular,
+    kSynthesized
+  };
+
+  /// NOTE: 'dataType' is the column type in target write table. 'hiveType' is
+  /// converted type of the corresponding column in source table which might not
+  /// be the same type, and the table scan needs to do data coercion if needs.
+  /// The table writer also needs to respect the type difference when processing
+  /// input data such as bucket id calculation.
+  ColumnHandleBase(
+      const std::string& columnName,
+      ColumnType columnType,
+      TypePtr dataType,
+      std::vector<velox::common::Subfield> requiredSubfields = {})
+      : columnName_(columnName),
+        columnType_(columnType),
+        dataType_(std::move(dataType)),
+        requiredSubfields_(std::move(requiredSubfields)) {}
+
+  const std::string& name() const override {
+    return columnName_;
+  }
+
+  ColumnType columnType() const {
+    return columnType_;
+  }
+
+  const TypePtr& dataType() const {
+    return dataType_;
+  }
+
+  /// Applies to columns of complex types: arrays, maps and structs.  When a
+  /// query uses only some of the subfields, the engine provides the complete
+  /// list of required subfields and the connector is free to prune the rest.
+  ///
+  /// Examples:
+  ///  - SELECT a[1], b['x'], x.y FROM t
+  ///  - SELECT a FROM t WHERE b['y'] > 10
+  ///
+  /// Pruning a struct means populating some of the members with null values.
+  ///
+  /// Pruning a map means dropping keys not listed in the required subfields.
+  ///
+  /// Pruning arrays means dropping values with indices larger than maximum
+  /// required index.
+  const std::vector<velox::common::Subfield>& requiredSubfields() const {
+    return requiredSubfields_;
+  }
+
+  bool isPartitionKey() const {
+    return columnType_ == ColumnType::kPartitionKey;
+  }
+
+  static ColumnHandlePtr create(const folly::dynamic& obj);
+
+  static ColumnHandleBase::ColumnType columnTypeFromName(
+      const std::string& name);
+
+  std::string toStringBase(const std::string& name) const;
+
+ protected:
+  folly::dynamic serializeBase(std::string_view name) const;
+
+
+  const std::string columnName_;
+  const ColumnType columnType_;
+  const TypePtr dataType_;
+  const std::vector<velox::common::Subfield> requiredSubfields_;
+
+ private:
+  static std::string columnTypeName(ColumnHandleBase::ColumnType columnType);
+};
+
+class TableHandleBase : public ConnectorTableHandle {
+ public:
+  TableHandleBase(
+      std::string connectorId,
+      const std::string& tableName,
+      bool filterPushdownEnabled,
+      velox::common::SubfieldFilters subfieldFilters,
+      const core::TypedExprPtr& remainingFilter,
+      const RowTypePtr& dataColumns = nullptr,
+      const std::unordered_map<std::string, std::string>& tableParameters = {})
+      : ConnectorTableHandle(std::move(connectorId)),
+        tableName_(tableName),
+        filterPushdownEnabled_(filterPushdownEnabled),
+        subfieldFilters_(std::move(subfieldFilters)),
+        remainingFilter_(remainingFilter),
+        dataColumns_(dataColumns),
+        tableParameters_(tableParameters) {}
+
+  const std::string& tableName() const {
+    return tableName_;
+  }
+
+  const std::string& name() const override {
+    return tableName_;
+  }
+
+  bool isFilterPushdownEnabled() const {
+    return filterPushdownEnabled_;
+  }
+
+  const velox::common::SubfieldFilters& subfieldFilters() const {
+    return subfieldFilters_;
+  }
+
+  const core::TypedExprPtr& remainingFilter() const {
+    return remainingFilter_;
+  }
+
+  // Schema of the table.  Need this for reading TEXTFILE.
+  const RowTypePtr& dataColumns() const {
+    return dataColumns_;
+  }
+
+  const std::unordered_map<std::string, std::string>& tableParameters() const {
+    return tableParameters_;
+  }
+
+  static ConnectorTableHandlePtr create(
+      const folly::dynamic& obj,
+      void* context);
+
+ protected:
+  folly::dynamic serializeBase(const std::string& className) const;
+
+  std::string toStringBase(const std::string& className) const;
+
+  const std::string tableName_;
+  const bool filterPushdownEnabled_;
+  const velox::common::SubfieldFilters subfieldFilters_;
+  const core::TypedExprPtr remainingFilter_;
+  const RowTypePtr dataColumns_;
+  const std::unordered_map<std::string, std::string> tableParameters_;
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg
diff --git a/velox/connectors/lakehouse/iceberg/tests/CMakeLists.txt b/velox/connectors/lakehouse/iceberg/tests/CMakeLists.txt
new file mode 100644
index 000000000000..3d73bd6545ce
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/CMakeLists.txt
@@ -0,0 +1,91 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(velox_lakehouse_common_test_lib
+        ConnectorTestBase.cpp
+        )
+
+target_link_libraries(
+        velox_lakehouse_common_test_lib
+        velox_vector_test_lib
+        velox_exec
+        velox_exec_test_lib
+        GTest::gtest
+        GTest::gtest_main)
+
+add_library(velox_lakehouse_iceberg_reader_benchmark_lib
+        IcebergConnectorTestBase.cpp IcebergSplitReaderBenchmark.cpp)
+
+target_link_libraries(
+  velox_lakehouse_iceberg_reader_benchmark_lib
+  velox_exec_test_lib
+  velox_exec
+  Folly::folly
+  Folly::follybenchmark
+  ${TEST_LINK_LIBS})
+
+if(VELOX_ENABLE_BENCHMARKS)
+  add_executable(velox_lakehouse_iceberg_reader_benchmark
+          IcebergSplitReaderBenchmarkMain.cpp)
+  target_link_libraries(
+    velox_lakehouse_iceberg_reader_benchmark
+    velox_lakehouse_iceberg_reader_benchmark_lib
+    velox_exec_test_lib
+    velox_exec
+    Folly::folly
+    Folly::follybenchmark
+    ${TEST_LINK_LIBS})
+endif()
+
+add_executable(
+  velox_lakehouse_iceberg_test IcebergReadTest.cpp
+        IcebergSplitReaderBenchmarkTest.cpp IcebergConnectorTestBase.cpp PlanBuilder.cpp)
+
+add_test(velox_lakehouse_iceberg_test velox_lakehouse_iceberg_test)
+
+target_link_libraries(
+  velox_lakehouse_iceberg_test
+  velox_lakehouse_iceberg_reader_benchmark_lib
+  velox_lakehouse_iceberg_connector
+  velox_lakehouse_common_test_lib
+  velox_dwio_common_exception
+  velox_dwio_common_test_utils
+  velox_vector_test_lib
+  velox_vector_fuzzer
+  velox_exec
+  velox_exec_test_lib
+  Folly::folly
+  Folly::follybenchmark
+  GTest::gtest
+  GTest::gtest_main)
+
+if(VELOX_ENABLE_PARQUET)
+  target_link_libraries(velox_lakehouse_iceberg_test velox_dwio_parquet_writer
+                        velox_dwio_parquet_reader)
+endif()
+
+add_executable(
+        velox_lakehouse_common_test
+        FileHandleTest.cpp)
+
+target_link_libraries(
+        velox_lakehouse_common_test
+        velox_lakehouse_common_test_lib
+        velox_vector_test_lib
+        velox_exec
+        velox_exec_test_lib
+        GTest::gtest
+        GTest::gtest_main)
+
+add_test(velox_lakehouse_common_test velox_lakehouse_common_test)
diff --git a/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.cpp b/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.cpp
new file mode 100644
index 000000000000..f067cf36271c
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectorTestBase.h"
+
+#include "velox/common/file/tests/FaultyFileSystem.h"
+#include "velox/connectors/lakehouse/iceberg/ConnectorSplitBase.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnector.h"
+#include "velox/dwio/common/tests/utils/BatchMaker.h"
+#include "velox/dwio/dwrf/RegisterDwrfReader.h"
+#include "velox/dwio/dwrf/RegisterDwrfWriter.h"
+#include "velox/dwio/dwrf/writer/Writer.h"
+
+using namespace facebook::velox::connector::lakehouse::iceberg;
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+using namespace facebook::velox;
+using namespace facebook::velox::common;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::exec::test;
+
+ConnectorTestBase::ConnectorTestBase() {
+  filesystems::registerLocalFileSystem();
+  velox::tests::utils::registerFaultyFileSystem();
+}
+
+void ConnectorTestBase::SetUp() {
+  OperatorTestBase::SetUp();
+  auto icebergConnector =
+      std::make_shared<iceberg::IcebergConnector>(kIcebergConnectorId,
+                                                  std::make_shared<config::ConfigBase>(
+                                                      std::unordered_map<std::string, std::string>()),
+                                                  ioExecutor_.get());
+    connector::registerConnector(icebergConnector);
+  dwio::common::registerFileSinks();
+  dwrf::registerDwrfReaderFactory();
+  dwrf::registerDwrfWriterFactory();
+}
+
+void ConnectorTestBase::TearDown() {
+  // Make sure all pending loads are finished or cancelled before unregister
+  // connector.
+  ioExecutor_.reset();
+  dwrf::unregisterDwrfReaderFactory();
+  dwrf::unregisterDwrfWriterFactory();
+  connector::unregisterConnector(kIcebergConnectorId);
+  connector::unregisterConnector(
+      iceberg::IcebergConnectorFactory::kIcebergConnectorName);
+  OperatorTestBase::TearDown();
+}
+
+ void ConnectorTestBase::resetIcebergConnector(
+     const std::shared_ptr<const config::ConfigBase>& config) {
+   connector::unregisterConnector(kIcebergConnectorId);
+   auto icebergConnector =
+       std::make_shared<iceberg::IcebergConnector>(kIcebergConnectorId, config, ioExecutor_.get());
+   connector::registerConnector(icebergConnector);
+ }
+
+void ConnectorTestBase::writeToFiles(
+    const std::vector<std::string>& filePaths,
+    std::vector<RowVectorPtr> vectors) {
+  VELOX_CHECK_EQ(filePaths.size(), vectors.size());
+  for (int i = 0; i < filePaths.size(); ++i) {
+    writeToFile(filePaths[i], std::vector{vectors[i]});
+  }
+}
+
+void ConnectorTestBase::writeToFile(
+    const std::string& filePath,
+    RowVectorPtr vector) {
+  writeToFile(filePath, std::vector{vector});
+}
+
+void ConnectorTestBase::writeToFile(
+    const std::string& filePath,
+    const std::vector<RowVectorPtr>& vectors,
+    std::shared_ptr<dwrf::Config> config,
+    const std::function<std::unique_ptr<dwrf::DWRFFlushPolicy>()>&
+        flushPolicyFactory) {
+  writeToFile(
+      filePath,
+      vectors,
+      std::move(config),
+      vectors[0]->type(),
+      flushPolicyFactory);
+}
+
+void ConnectorTestBase::writeToFile(
+    const std::string& filePath,
+    const std::vector<RowVectorPtr>& vectors,
+    std::shared_ptr<dwrf::Config> config,
+    const TypePtr& schema,
+    const std::function<std::unique_ptr<dwrf::DWRFFlushPolicy>()>&
+        flushPolicyFactory) {
+  velox::dwrf::WriterOptions options;
+  options.config = config;
+  options.schema = schema;
+  auto fs = filesystems::getFileSystem(filePath, {});
+  auto writeFile = fs->openFileForWrite(
+      filePath,
+      {.shouldCreateParentDirectories = true,
+       .shouldThrowOnFileAlreadyExists = false});
+  auto sink = std::make_unique<dwio::common::WriteFileSink>(
+      std::move(writeFile), filePath);
+  auto childPool = rootPool_->addAggregateChild("ConnectorTestBase.Writer");
+  options.memoryPool = childPool.get();
+  options.flushPolicyFactory = flushPolicyFactory;
+
+  facebook::velox::dwrf::Writer writer{std::move(sink), options};
+  for (size_t i = 0; i < vectors.size(); ++i) {
+    writer.write(vectors[i]);
+  }
+  writer.close();
+}
+
+void ConnectorTestBase::createDirectory(const std::string& directoryPath) {
+  auto fs = filesystems::getFileSystem(directoryPath, {});
+  fs->mkdir(directoryPath);
+}
+
+void ConnectorTestBase::removeDirectory(const std::string& directoryPath) {
+  auto fs = filesystems::getFileSystem(directoryPath, {});
+  if (fs->exists(directoryPath)) {
+    fs->rmdir(directoryPath);
+  }
+}
+
+void ConnectorTestBase::removeFile(const std::string& filePath) {
+  auto fs = filesystems::getFileSystem(filePath, {});
+  if (fs->exists(filePath)) {
+    fs->remove(filePath);
+  }
+}
+
+std::vector<RowVectorPtr> ConnectorTestBase::makeVectors(
+    const RowTypePtr& rowType,
+    int32_t numVectors,
+    int32_t rowsPerVector) {
+  std::vector<RowVectorPtr> vectors;
+  for (int32_t i = 0; i < numVectors; ++i) {
+    auto vector = std::dynamic_pointer_cast<RowVector>(
+        velox::test::BatchMaker::createBatch(rowType, rowsPerVector, *pool_));
+    vectors.push_back(vector);
+  }
+  return vectors;
+}
+
+std::vector<std::shared_ptr<TempFilePath>> ConnectorTestBase::makeFilePaths(
+    int count) {
+  std::vector<std::shared_ptr<TempFilePath>> filePaths;
+
+  filePaths.reserve(count);
+  for (auto i = 0; i < count; ++i) {
+    filePaths.emplace_back(TempFilePath::create());
+  }
+  return filePaths;
+}
+
+std::shared_ptr<ColumnHandleBase> ConnectorTestBase::partitionKey(
+    const std::string& name,
+    const TypePtr& type) {
+  return std::make_shared<ColumnHandleBase>(
+      name,
+      ColumnHandleBase::ColumnType::kPartitionKey,
+      //      type,
+      type);
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.h b/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.h
new file mode 100644
index 000000000000..5838099368c2
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/ConnectorTestBase.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/connectors/lakehouse/iceberg/TableHandleBase.h"
+#include "velox/dwio/dwrf/common/Config.h"
+#include "velox/dwio/dwrf/writer/FlushPolicy.h"
+#include "velox/exec/Operator.h"
+#include "velox/exec/tests/utils/OperatorTestBase.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+#include "velox/type/tests/SubfieldFiltersBuilder.h"
+
+#include <string>
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+static const std::string kIcebergConnectorId = "test-hive";
+
+using ColumnHandleMap =
+    std::unordered_map<std::string, std::shared_ptr<connector::ColumnHandle>>;
+
+class ConnectorTestBase : public exec::test::OperatorTestBase {
+ public:
+  ConnectorTestBase();
+
+  void SetUp() override;
+  void TearDown() override;
+
+  void resetIcebergConnector(
+      const std::shared_ptr<const config::ConfigBase>& config);
+
+  void writeToFiles(
+      const std::vector<std::string>& filePaths,
+      std::vector<RowVectorPtr> vectors);
+
+  void writeToFile(const std::string& filePath, RowVectorPtr vector);
+
+  void writeToFile(
+      const std::string& filePath,
+      const std::vector<RowVectorPtr>& vectors,
+      std::shared_ptr<dwrf::Config> config =
+          std::make_shared<facebook::velox::dwrf::Config>(),
+      const std::function<std::unique_ptr<dwrf::DWRFFlushPolicy>()>&
+          flushPolicyFactory = nullptr);
+
+  void writeToFile(
+      const std::string& filePath,
+      const std::vector<RowVectorPtr>& vectors,
+      std::shared_ptr<dwrf::Config> config,
+      const TypePtr& schema,
+      const std::function<std::unique_ptr<dwrf::DWRFFlushPolicy>()>&
+          flushPolicyFactory = nullptr);
+
+  // Creates a directory using matching file system based on directoryPath.
+  // No throw when directory already exists.
+  void createDirectory(const std::string& directoryPath);
+
+  // Removes a directory using matching file system based on directoryPath.
+  // No op when directory does not exist.
+  void removeDirectory(const std::string& directoryPath);
+
+  // Removes a file using matching file system based on filePath.
+  // No op when file does not exist.
+  void removeFile(const std::string& filePath);
+
+  std::vector<RowVectorPtr> makeVectors(
+      const RowTypePtr& rowType,
+      int32_t numVectors,
+      int32_t rowsPerVector);
+
+  using OperatorTestBase::assertQuery;
+
+//  std::shared_ptr<velox::exec::Task> assertQuery(
+//      const core::PlanNodePtr& plan,
+//      const std::vector<std::shared_ptr<connector::ConnectorSplit>>& splits,
+//      const std::string& duckDbSql,
+//      const int32_t numPrefetchSplit);
+
+  static std::vector<std::shared_ptr<facebook::velox::exec::test::TempFilePath>>
+  makeFilePaths(int count);
+
+  static std::shared_ptr<ColumnHandleBase> regularColumn(
+      const std::string& name,
+      const TypePtr& type);
+
+  static std::shared_ptr<ColumnHandleBase> partitionKey(
+      const std::string& name,
+      const TypePtr& type);
+
+  static std::shared_ptr<ColumnHandleBase>
+  synthesizedColumn(const std::string& name, const TypePtr& type);
+
+  static ColumnHandleMap allRegularColumns(const RowTypePtr& rowType) {
+    ColumnHandleMap assignments;
+    assignments.reserve(rowType->size());
+    for (uint32_t i = 0; i < rowType->size(); ++i) {
+      const auto& name = rowType->nameOf(i);
+      assignments[name] = regularColumn(name, rowType->childAt(i));
+    }
+    return assignments;
+  }
+};
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/FileHandleTest.cpp b/velox/connectors/lakehouse/iceberg/tests/FileHandleTest.cpp
new file mode 100644
index 000000000000..709a8f6f4904
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/FileHandleTest.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/FileHandle.h"
+#include "ConnectorTestBase.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+using namespace facebook::velox;
+
+TEST(FileHandleTest, localFile) {
+  filesystems::registerLocalFileSystem();
+
+  auto tempFile = exec::test::TempFilePath::create();
+  const auto& filename = tempFile->getPath();
+  remove(filename.c_str());
+
+  {
+    LocalWriteFile writeFile(filename);
+    writeFile.append("foo");
+  }
+
+  FileHandleFactory factory(
+      std::make_unique<SimpleLRUCache<FileHandleKey, FileHandle>>(1000),
+      std::make_unique<FileHandleGenerator>());
+  FileHandleKey fileHandleKey{
+      .filename = filename};
+  auto fileHandle = factory.generate(fileHandleKey);
+  ASSERT_EQ(fileHandle->file->size(), 3);
+  char buffer[3];
+  ASSERT_EQ(fileHandle->file->pread(0, 3, &buffer), "foo");
+
+  // Clean up
+  remove(filename.c_str());
+}
+
+TEST(FileHandleTest, localFileWithProperties) {
+  filesystems::registerLocalFileSystem();
+
+  auto tempFile = exec::test::TempFilePath::create();
+  const auto& filename = tempFile->getPath();
+  remove(filename.c_str());
+
+  {
+    LocalWriteFile writeFile(filename);
+    writeFile.append("foo");
+  }
+
+  FileHandleFactory factory(
+      std::make_unique<SimpleLRUCache<FileHandleKey, FileHandle>>(1000),
+      std::make_unique<FileHandleGenerator>());
+  FileProperties properties = {
+      .fileSize = tempFile->fileSize(),
+      .modificationTime = tempFile->fileModifiedTime()};
+  FileHandleKey fileHandleKey{
+      .filename = filename};
+  auto fileHandle = factory.generate(fileHandleKey, &properties);
+  ASSERT_EQ(fileHandle->file->size(), 3);
+  char buffer[3];
+  ASSERT_EQ(fileHandle->file->pread(0, 3, &buffer), "foo");
+
+  // Clean up
+  remove(filename.c_str());
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.cpp b/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.cpp
new file mode 100644
index 000000000000..760bba8ee1a8
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.h"
+
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+std::shared_ptr<exec::Task> IcebergConnectorTestBase::assertQuery(
+    const core::PlanNodePtr& plan,
+    const std::vector<std::shared_ptr<exec::test::TempFilePath>>& filePaths,
+    const std::string& duckDbSql) {
+  return OperatorTestBase::assertQuery(
+      plan, makeIcebergConnectorSplits(filePaths), duckDbSql);
+}
+
+std::shared_ptr<exec::Task> IcebergConnectorTestBase::assertQuery(
+    const core::PlanNodePtr& plan,
+    const std::vector<std::shared_ptr<ConnectorSplit>>& splits,
+    const std::string& duckDbSql,
+    const int32_t numPrefetchSplit) {
+  return exec::test::AssertQueryBuilder(plan, duckDbQueryRunner_)
+      .config(
+          core::QueryConfig::kMaxSplitPreloadPerDriver,
+          std::to_string(numPrefetchSplit))
+      .splits(splits)
+      .assertResults(duckDbSql);
+}
+
+std::vector<std::shared_ptr<ConnectorSplit>>
+IcebergConnectorTestBase::makeIcebergConnectorSplits(
+    const std::string& filePath,
+    uint32_t splitCount,
+    dwio::common::FileFormat format,
+    const std::optional<
+        std::unordered_map<std::string, std::optional<std::string>>>&
+        partitionKeys,
+    const std::optional<std::unordered_map<std::string, std::string>>&
+        infoColumns) {
+  auto file =
+      filesystems::getFileSystem(filePath, nullptr)->openFileForRead(filePath);
+  const uint64_t fileSize = file->size();
+  // Take the upper bound.
+  const uint64_t splitSize = std::ceil((fileSize) / splitCount);
+  std::vector<std::shared_ptr<ConnectorSplit>> splits;
+  // Add all the splits.
+  for (uint32_t i = 0; i < splitCount; i++) {
+    auto splitBuilder = IcebergConnectorSplitBuilder(filePath)
+                            .fileFormat(format)
+                            .start(i * splitSize)
+                            .length(splitSize);
+    if (infoColumns.has_value()) {
+      for (const auto& infoColumn : infoColumns.value()) {
+        splitBuilder.infoColumn(infoColumn.first, infoColumn.second);
+      }
+    }
+    if (partitionKeys.has_value()) {
+      for (const auto& partitionKey : partitionKeys.value()) {
+        splitBuilder.partitionKey(partitionKey.first, partitionKey.second);
+      }
+    }
+
+    auto split = splitBuilder.build();
+    splits.push_back(std::move(split));
+  }
+  return splits;
+}
+
+std::vector<std::shared_ptr<ConnectorSplit>>
+IcebergConnectorTestBase::makeIcebergConnectorSplits(
+    const std::vector<std::shared_ptr<exec::test::TempFilePath>>& filePaths) {
+  std::vector<std::shared_ptr<ConnectorSplit>> splits;
+  splits.reserve(filePaths.size());
+  for (const auto& filePath : filePaths) {
+    IcebergConnectorSplitBuilder icebergConnectorSplitBuilder(filePath->getPath());
+    icebergConnectorSplitBuilder.start(0)
+        .length(std::numeric_limits<uint64_t>::max());
+    splits.push_back(icebergConnectorSplitBuilder.build());
+  }
+  return splits;
+}
+
+}
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.h b/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.h
new file mode 100644
index 000000000000..e221c01c6bee
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "ConnectorTestBase.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h"
+#include "velox/exec/Task.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+class IcebergConnectorTestBase : public ConnectorTestBase {
+ public:
+  std::shared_ptr<exec::Task> assertQuery(
+      const core::PlanNodePtr& plan,
+      const std::vector<std::shared_ptr<exec::test::TempFilePath>>& filePaths,
+      const std::string& duckDbSql);
+
+  std::shared_ptr<exec::Task> assertQuery(
+      const core::PlanNodePtr& plan,
+      const std::vector<std::shared_ptr<ConnectorSplit>>& splits,
+      const std::string& duckDbSql,
+      const int32_t numPrefetchSplit = 0);
+
+  std::vector<std::shared_ptr<ConnectorSplit>>
+  makeIcebergConnectorSplits(
+      const std::vector<std::shared_ptr<exec::test::TempFilePath>>& filePaths);
+
+  /// Split file at path 'filePath' into 'splitCount' splits. If not local file,
+  /// file size can be given as 'externalSize'.
+  static std::vector<
+      std::shared_ptr<ConnectorSplit>>
+  makeIcebergConnectorSplits(
+      const std::string& filePath,
+      uint32_t splitCount,
+      dwio::common::FileFormat format,
+      const std::optional<
+          std::unordered_map<std::string, std::optional<std::string>>>&
+          partitionKeys = {},
+      const std::optional<std::unordered_map<std::string, std::string>>&
+          infoColumns = {});
+};
+
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergReadTest.cpp b/velox/connectors/lakehouse/iceberg/tests/IcebergReadTest.cpp
new file mode 100644
index 000000000000..84832d981f40
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergReadTest.cpp
@@ -0,0 +1,858 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/lakehouse/iceberg/ConnectorSplitBase.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergMetadataColumns.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergTableHandle.h"
+#include "velox/connectors/lakehouse/iceberg/tests/IcebergConnectorTestBase.h"
+#include "velox/connectors/lakehouse/iceberg/tests/PlanBuilder.h"
+#include "velox/exec/PlanNodeStats.h"
+
+#include <folly/Singleton.h>
+
+#include <string>
+
+using namespace facebook::velox::exec::test;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::dwio::common;
+using namespace facebook::velox::test;
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+class IcebergReadTest : public IcebergConnectorTestBase {
+ public:
+  IcebergReadTest()
+      : config_{std::make_shared<facebook::velox::dwrf::Config>()} {
+    // Make the writers flush per batch so that we can create non-aligned
+    // RowGroups between the base data files and delete files
+    flushPolicyFactory_ = []() {
+      return std::make_unique<dwrf::LambdaFlushPolicy>([]() { return true; });
+    };
+  }
+
+  /// Create 1 base data file data_file_1 with 2 RowGroups of 10000 rows each.
+  /// Also create 1 delete file delete_file_1 which contains delete positions
+  /// for data_file_1.
+  void assertSingleBaseFileSingleDeleteFile(
+      const std::vector<int64_t>& deletePositionsVec) {
+    std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles = {
+        {"data_file_1", {10000, 10000}}};
+    std::unordered_map<
+        std::string,
+        std::multimap<std::string, std::vector<int64_t>>>
+        deleteFilesForBaseDatafiles = {
+            {"delete_file_1", {{"data_file_1", deletePositionsVec}}}};
+
+    assertPositionalDeletes(
+        rowGroupSizesForFiles, deleteFilesForBaseDatafiles, 0);
+  }
+
+  /// Create 3 base data files, where the first file data_file_0 has 500 rows,
+  /// the second file data_file_1 contains 2 RowGroups of 10000 rows each, and
+  /// the third file data_file_2 contains 500 rows. It creates 1 positional
+  /// delete file delete_file_1, which contains delete positions for
+  /// data_file_1.
+  void assertMultipleBaseFileSingleDeleteFile(
+      const std::vector<int64_t>& deletePositionsVec) {
+    int64_t previousFileRowCount = 500;
+    int64_t afterFileRowCount = 500;
+
+    assertPositionalDeletes(
+        {
+            {"data_file_0", {previousFileRowCount}},
+            {"data_file_1", {10000, 10000}},
+            {"data_file_2", {afterFileRowCount}},
+        },
+        {{"delete_file_1", {{"data_file_1", deletePositionsVec}}}},
+        0);
+  }
+
+  /// Create 1 base data file data_file_1 with 2 RowGroups of 10000 rows each.
+  /// Create multiple delete files with name data_file_1, data_file_2, and so on
+  void assertSingleBaseFileMultipleDeleteFiles(
+      const std::vector<std::vector<int64_t>>& deletePositionsVecs) {
+    std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles = {
+        {"data_file_1", {10000, 10000}}};
+
+    std::unordered_map<
+        std::string,
+        std::multimap<std::string, std::vector<int64_t>>>
+        deleteFilesForBaseDatafiles;
+    for (int i = 0; i < deletePositionsVecs.size(); i++) {
+      std::string deleteFileName = fmt::format("delete_file_{}", i);
+      deleteFilesForBaseDatafiles[deleteFileName] = {
+          {"data_file_1", deletePositionsVecs[i]}};
+    }
+    assertPositionalDeletes(
+        rowGroupSizesForFiles, deleteFilesForBaseDatafiles, 0);
+  }
+
+  void assertMultipleSplits(
+      const std::vector<int64_t>& deletePositions,
+      int32_t fileCount,
+      int32_t numPrefetchSplits,
+      int rowCountPerFile = rowCount,
+      int32_t splitCountPerFile = 1) {
+    std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles;
+    for (int32_t i = 0; i < fileCount; i++) {
+      std::string dataFileName = fmt::format("data_file_{}", i);
+      rowGroupSizesForFiles[dataFileName] = {rowCountPerFile};
+    }
+
+    std::unordered_map<
+        std::string,
+        std::multimap<std::string, std::vector<int64_t>>>
+        deleteFilesForBaseDatafiles;
+    for (int i = 0; i < fileCount; i++) {
+      std::string deleteFileName = fmt::format("delete_file_{}", i);
+      deleteFilesForBaseDatafiles[deleteFileName] = {
+          {fmt::format("data_file_{}", i), deletePositions}};
+    }
+
+    assertPositionalDeletes(
+        rowGroupSizesForFiles,
+        deleteFilesForBaseDatafiles,
+        numPrefetchSplits,
+        splitCountPerFile);
+  }
+
+  std::vector<int64_t> makeRandomIncreasingValues(int64_t begin, int64_t end) {
+    VELOX_CHECK(begin < end);
+
+    std::mt19937 gen{0};
+    std::vector<int64_t> values;
+    values.reserve(end - begin);
+    for (int i = begin; i < end; i++) {
+      if (folly::Random::rand32(0, 10, gen) > 8) {
+        values.push_back(i);
+      }
+    }
+    return values;
+  }
+
+  std::vector<int64_t> makeContinuousIncreasingValues(
+      int64_t begin,
+      int64_t end) {
+    std::vector<int64_t> values;
+    values.resize(end - begin);
+    std::iota(values.begin(), values.end(), begin);
+    return values;
+  }
+
+  /// @rowGroupSizesForFiles The key is the file name, and the value is a vector
+  /// of RowGroup sizes
+  /// @deleteFilesForBaseDatafiles The key is the delete file name, and the
+  /// value contains the information about the content of this delete file.
+  /// e.g. {
+  ///         "delete_file_1",
+  ///         {
+  ///             {"data_file_1", {1, 2, 3}},
+  ///             {"data_file_1", {4, 5, 6}},
+  ///             {"data_file_2", {0, 2, 4}}
+  ///         }
+  ///     }
+  /// represents one delete file called delete_file_1, which contains delete
+  /// positions for data_file_1 and data_file_2. THere are 3 RowGroups in this
+  /// delete file, the first two contain positions for data_file_1, and the last
+  /// contain positions for data_file_2
+  void assertPositionalDeletes(
+      const std::map<std::string, std::vector<int64_t>>& rowGroupSizesForFiles,
+      const std::unordered_map<
+          std::string,
+          std::multimap<std::string, std::vector<int64_t>>>&
+          deleteFilesForBaseDatafiles,
+      int32_t numPrefetchSplits = 0,
+      int32_t splitCount = 1) {
+    // Keep the reference to the deleteFilePath, otherwise the corresponding
+    // file will be deleted.
+    std::map<std::string, std::shared_ptr<TempFilePath>> dataFilePaths =
+        writeDataFiles(rowGroupSizesForFiles);
+    std::unordered_map<
+        std::string,
+        std::pair<int64_t, std::shared_ptr<TempFilePath>>>
+        deleteFilePaths = writePositionDeleteFiles(
+            deleteFilesForBaseDatafiles, dataFilePaths);
+
+    std::vector<std::shared_ptr<ConnectorSplit>> splits;
+
+    for (const auto& dataFile : dataFilePaths) {
+      std::string baseFileName = dataFile.first;
+      std::string baseFilePath = dataFile.second->getPath();
+
+      std::vector<IcebergDeleteFile> deleteFiles;
+
+      for (auto const& deleteFile : deleteFilesForBaseDatafiles) {
+        std::string deleteFileName = deleteFile.first;
+        std::multimap<std::string, std::vector<int64_t>> deleteFileContent =
+            deleteFile.second;
+
+        if (deleteFileContent.count(baseFileName) != 0) {
+          // If this delete file contains rows for the target base file, then
+          // add it to the split
+          auto deleteFilePath =
+              deleteFilePaths[deleteFileName].second->getPath();
+          IcebergDeleteFile icebergDeleteFile(
+              FileContent::kPositionalDeletes,
+              deleteFilePath,
+              fileFomat_,
+              deleteFilePaths[deleteFileName].first,
+              testing::internal::GetFileSize(
+                  std::fopen(deleteFilePath.c_str(), "r")));
+          deleteFiles.push_back(icebergDeleteFile);
+        }
+      }
+
+      auto icebergSplits =
+          makeIcebergSplits(baseFilePath, deleteFiles, {}, splitCount);
+      splits.insert(splits.end(), icebergSplits.begin(), icebergSplits.end());
+    }
+
+    std::string duckdbSql =
+        getDuckDBQuery(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+    auto plan = tableScanNode(rowType_);
+    auto task = assertQuery(
+        plan, splits, duckdbSql, numPrefetchSplits);
+
+    auto planStats = toPlanStats(task->taskStats());
+    auto scanNodeId = plan->id();
+    auto it = planStats.find(scanNodeId);
+    ASSERT_TRUE(it != planStats.end());
+    ASSERT_TRUE(it->second.peakMemoryBytes > 0);
+  }
+
+  std::vector<int64_t> makeSequenceValues(int32_t numRows, int8_t repeat = 1) {
+    VELOX_CHECK_GT(repeat, 0);
+
+    auto maxValue = std::ceil((double)numRows / repeat);
+    std::vector<int64_t> values;
+    values.reserve(numRows);
+    for (int32_t i = 0; i < maxValue; i++) {
+      for (int8_t j = 0; j < repeat; j++) {
+        values.push_back(i);
+      }
+    }
+    values.resize(numRows);
+    return values;
+  }
+
+  std::vector<int64_t> makeRandomDeleteValues(int32_t maxRowNumber) {
+    std::mt19937 gen{0};
+    std::vector<int64_t> deleteRows;
+    for (int i = 0; i < maxRowNumber; i++) {
+      if (folly::Random::rand32(0, 10, gen) > 8) {
+        deleteRows.push_back(i);
+      }
+    }
+    return deleteRows;
+  }
+
+  const static int rowCount = 20000;
+
+ protected:
+  std::shared_ptr<dwrf::Config> config_;
+  std::function<std::unique_ptr<dwrf::DWRFFlushPolicy>()> flushPolicyFactory_;
+
+  std::vector<std::shared_ptr<ConnectorSplitBase>> makeIcebergSplits(
+      const std::string& dataFilePath,
+      const std::vector<IcebergDeleteFile>& deleteFiles = {},
+      const std::unordered_map<std::string, std::optional<std::string>>&
+          partitionKeys = {},
+      const uint32_t splitCount = 1) {
+    std::unordered_map<std::string, std::string> customSplitInfo;
+    customSplitInfo["table_format"] = "hive-iceberg";
+
+    auto file = filesystems::getFileSystem(dataFilePath, nullptr)
+                    ->openFileForRead(dataFilePath);
+    const int64_t fileSize = file->size();
+    std::vector<std::shared_ptr<ConnectorSplitBase>> splits;
+    const uint64_t splitSize = std::floor((fileSize) / splitCount);
+
+    for (int i = 0; i < splitCount; ++i) {
+      IcebergConnectorSplitBuilder icebergConnectorSplitBuilder(dataFilePath);
+      icebergConnectorSplitBuilder.connectorId(kIcebergConnectorId)
+          .fileFormat(fileFomat_)
+          .start(i * splitSize)
+          .length(splitSize)
+          .partitionKeys(partitionKeys)
+          .splitWeight(0)
+          .cacheable(true)
+          .deleteFiles(deleteFiles);
+      splits.emplace_back(std::move(icebergConnectorSplitBuilder.build()));
+    }
+
+    return splits;
+  }
+
+ private:
+  std::map<std::string, std::shared_ptr<TempFilePath>> writeDataFiles(
+      std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles) {
+    std::map<std::string, std::shared_ptr<TempFilePath>> dataFilePaths;
+
+    std::vector<RowVectorPtr> dataVectorsJoined;
+    dataVectorsJoined.reserve(rowGroupSizesForFiles.size());
+
+    int64_t startingValue = 0;
+    for (auto& dataFile : rowGroupSizesForFiles) {
+      dataFilePaths[dataFile.first] = TempFilePath::create();
+
+      // We make the values are continuously increasing even across base data
+      // files. This is to make constructing DuckDB queries easier
+      std::vector<RowVectorPtr> dataVectors =
+          makeVectors(dataFile.second, startingValue);
+      writeToFile(
+          dataFilePaths[dataFile.first]->getPath(),
+          dataVectors,
+          config_,
+          flushPolicyFactory_);
+
+      for (int i = 0; i < dataVectors.size(); i++) {
+        dataVectorsJoined.push_back(dataVectors[i]);
+      }
+    }
+
+    createDuckDbTable(dataVectorsJoined);
+    return dataFilePaths;
+  }
+
+  /// Input is like <"deleteFile1", <"dataFile1", {pos_RG1, pos_RG2,..}>,
+  /// <"dataFile2", {pos_RG1, pos_RG2,..}>
+  std::unordered_map<
+      std::string,
+      std::pair<int64_t, std::shared_ptr<TempFilePath>>>
+  writePositionDeleteFiles(
+      const std::unordered_map<
+          std::string, // delete file name
+          std::multimap<
+              std::string,
+              std::vector<int64_t>>>&
+          deleteFilesForBaseDatafiles, // <base file name, delete position
+                                       // vector for all RowGroups>
+      std::map<std::string, std::shared_ptr<TempFilePath>> baseFilePaths) {
+    std::unordered_map<
+        std::string,
+        std::pair<int64_t, std::shared_ptr<TempFilePath>>>
+        deleteFilePaths;
+    deleteFilePaths.reserve(deleteFilesForBaseDatafiles.size());
+
+    for (auto& deleteFile : deleteFilesForBaseDatafiles) {
+      auto deleteFileName = deleteFile.first;
+      auto deleteFileContent = deleteFile.second;
+      auto deleteFilePath = TempFilePath::create();
+
+      std::vector<RowVectorPtr> deleteFileVectors;
+      int64_t totalPositionsInDeleteFile = 0;
+
+      for (auto& deleteFileRowGroup : deleteFileContent) {
+        auto baseFileName = deleteFileRowGroup.first;
+        auto baseFilePath = baseFilePaths[baseFileName]->getPath();
+        auto positionsInRowGroup = deleteFileRowGroup.second;
+
+        auto filePathVector = makeFlatVector<std::string>(
+            static_cast<vector_size_t>(positionsInRowGroup.size()),
+            [&](vector_size_t row) { return baseFilePath; });
+        auto deletePosVector = makeFlatVector<int64_t>(positionsInRowGroup);
+
+        RowVectorPtr deleteFileVector = makeRowVector(
+            {pathColumn_->name, posColumn_->name},
+            {filePathVector, deletePosVector});
+
+        deleteFileVectors.push_back(deleteFileVector);
+        totalPositionsInDeleteFile += positionsInRowGroup.size();
+      }
+
+      writeToFile(
+          deleteFilePath->getPath(),
+          deleteFileVectors,
+          config_,
+          flushPolicyFactory_);
+
+      deleteFilePaths[deleteFileName] =
+          std::make_pair(totalPositionsInDeleteFile, deleteFilePath);
+    }
+
+    return deleteFilePaths;
+  }
+
+  std::vector<RowVectorPtr> makeVectors(
+      std::vector<int64_t> vectorSizes,
+      int64_t& startingValue) {
+    std::vector<RowVectorPtr> vectors;
+    vectors.reserve(vectorSizes.size());
+
+    vectors.reserve(vectorSizes.size());
+    for (int j = 0; j < vectorSizes.size(); j++) {
+      auto data = makeContinuousIncreasingValues(
+          startingValue, startingValue + vectorSizes[j]);
+      VectorPtr c0 = makeFlatVector<int64_t>(data);
+      vectors.push_back(makeRowVector({"c0"}, {c0}));
+      startingValue += vectorSizes[j];
+    }
+
+    return vectors;
+  }
+
+  std::string getDuckDBQuery(
+      const std::map<std::string, std::vector<int64_t>>& rowGroupSizesForFiles,
+      const std::unordered_map<
+          std::string,
+          std::multimap<std::string, std::vector<int64_t>>>&
+          deleteFilesForBaseDatafiles) {
+    int64_t totalNumRowsInAllBaseFiles = 0;
+    std::map<std::string, int64_t> baseFileSizes;
+    for (auto rowGroupSizesInFile : rowGroupSizesForFiles) {
+      // Sum up the row counts in all RowGroups in each base file
+      baseFileSizes[rowGroupSizesInFile.first] += std::accumulate(
+          rowGroupSizesInFile.second.begin(),
+          rowGroupSizesInFile.second.end(),
+          0LL);
+      totalNumRowsInAllBaseFiles += baseFileSizes[rowGroupSizesInFile.first];
+    }
+
+    // Group the delete vectors by baseFileName
+    std::map<std::string, std::vector<std::vector<int64_t>>>
+        deletePosVectorsForAllBaseFiles;
+    for (auto deleteFile : deleteFilesForBaseDatafiles) {
+      auto deleteFileContent = deleteFile.second;
+      for (auto rowGroup : deleteFileContent) {
+        auto baseFileName = rowGroup.first;
+        deletePosVectorsForAllBaseFiles[baseFileName].push_back(
+            rowGroup.second);
+      }
+    }
+
+    // Flatten and deduplicate the delete position vectors in
+    // deletePosVectorsForAllBaseFiles from previous step, and count the total
+    // number of distinct delete positions for all base files
+    std::map<std::string, std::vector<int64_t>>
+        flattenedDeletePosVectorsForAllBaseFiles;
+    int64_t totalNumDeletePositions = 0;
+    for (auto deleteVectorsForBaseFile : deletePosVectorsForAllBaseFiles) {
+      auto baseFileName = deleteVectorsForBaseFile.first;
+      auto deletePositionVectors = deleteVectorsForBaseFile.second;
+      std::vector<int64_t> deletePositionVector =
+          flattenAndDedup(deletePositionVectors, baseFileSizes[baseFileName]);
+      flattenedDeletePosVectorsForAllBaseFiles[baseFileName] =
+          deletePositionVector;
+      totalNumDeletePositions += deletePositionVector.size();
+    }
+
+    // Now build the DuckDB queries
+    if (totalNumDeletePositions == 0) {
+      return "SELECT * FROM tmp";
+    } else if (totalNumDeletePositions >= totalNumRowsInAllBaseFiles) {
+      return "SELECT * FROM tmp WHERE 1 = 0";
+    } else {
+      // Convert the delete positions in all base files into column values
+      std::vector<int64_t> allDeleteValues;
+
+      int64_t numRowsInPreviousBaseFiles = 0;
+      for (auto baseFileSize : baseFileSizes) {
+        auto deletePositions =
+            flattenedDeletePosVectorsForAllBaseFiles[baseFileSize.first];
+
+        if (numRowsInPreviousBaseFiles > 0) {
+          for (int64_t& deleteValue : deletePositions) {
+            deleteValue += numRowsInPreviousBaseFiles;
+          }
+        }
+
+        allDeleteValues.insert(
+            allDeleteValues.end(),
+            deletePositions.begin(),
+            deletePositions.end());
+
+        numRowsInPreviousBaseFiles += baseFileSize.second;
+      }
+
+      return fmt::format(
+          "SELECT * FROM tmp WHERE c0 NOT IN ({})",
+          makeNotInList(allDeleteValues));
+    }
+  }
+
+  std::vector<int64_t> flattenAndDedup(
+      const std::vector<std::vector<int64_t>>& deletePositionVectors,
+      int64_t baseFileSize) {
+    std::vector<int64_t> deletePositionVector;
+    for (auto vec : deletePositionVectors) {
+      for (auto pos : vec) {
+        if (pos >= 0 && pos < baseFileSize) {
+          deletePositionVector.push_back(pos);
+        }
+      }
+    }
+
+    std::sort(deletePositionVector.begin(), deletePositionVector.end());
+    auto last =
+        std::unique(deletePositionVector.begin(), deletePositionVector.end());
+    deletePositionVector.erase(last, deletePositionVector.end());
+
+    return deletePositionVector;
+  }
+
+  std::string makeNotInList(const std::vector<int64_t>& deletePositionVector) {
+    if (deletePositionVector.empty()) {
+      return "";
+    }
+
+    return std::accumulate(
+        deletePositionVector.begin() + 1,
+        deletePositionVector.end(),
+        std::to_string(deletePositionVector[0]),
+        [](const std::string& a, int64_t b) {
+          return a + ", " + std::to_string(b);
+        });
+  }
+
+  core::PlanNodePtr tableScanNode(RowTypePtr outputRowType) {
+    return PlanBuilder(pool_.get()).tableScan(outputRowType).planNode();
+  }
+
+  std::shared_ptr<IcebergMetadataColumn> pathColumn_ =
+      IcebergMetadataColumn::icebergDeleteFilePathColumn();
+  std::shared_ptr<IcebergMetadataColumn> posColumn_ =
+      IcebergMetadataColumn::icebergDeletePosColumn();
+
+ protected:
+  RowTypePtr rowType_{ROW({"c0"}, {BIGINT()})};
+  dwio::common::FileFormat fileFomat_{dwio::common::FileFormat::DWRF};
+
+  std::vector<std::shared_ptr<TempFilePath>> writeDataFiles(
+      uint64_t numRows,
+      int32_t numColumns = 1,
+      int32_t splitCount = 1,
+      std::vector<RowVectorPtr> dataVectors = {}) {
+    if (dataVectors.empty()) {
+      dataVectors = makeVectors(splitCount, numRows, numColumns);
+    }
+    VELOX_CHECK_EQ(dataVectors.size(), splitCount);
+
+    std::vector<std::shared_ptr<TempFilePath>> dataFilePaths;
+    dataFilePaths.reserve(splitCount);
+    for (auto i = 0; i < splitCount; i++) {
+      dataFilePaths.emplace_back(TempFilePath::create());
+      writeToFile(dataFilePaths.back()->getPath(), dataVectors[i]);
+    }
+
+    createDuckDbTable(dataVectors);
+    return dataFilePaths;
+  }
+
+  std::vector<RowVectorPtr>
+  makeVectors(int32_t count, int32_t rowsPerVector, int32_t numColumns = 1) {
+    std::vector<TypePtr> types(numColumns, BIGINT());
+    std::vector<std::string> names;
+    for (int j = 0; j < numColumns; j++) {
+      names.push_back(fmt::format("c{}", j));
+    }
+
+    std::vector<RowVectorPtr> rowVectors;
+    for (int i = 0; i < count; i++) {
+      std::vector<VectorPtr> vectors;
+
+      // Create the column values like below:
+      // c0 c1 c2
+      //  0  0  0
+      //  1  0  0
+      //  2  1  0
+      //  3  1  1
+      //  4  2  1
+      //  5  2  1
+      //  6  3  2
+      // ...
+      // In the first column c0, the values are continuously increasing and not
+      // repeating. In the second column c1, the values are continuously
+      // increasing and each value repeats once. And so on.
+      for (int j = 0; j < numColumns; j++) {
+        auto data = makeSequenceValues(rowsPerVector, j + 1);
+        vectors.push_back(vectorMaker_.flatVector<int64_t>(data));
+      }
+
+      rowVectors.push_back(makeRowVector(names, vectors));
+    }
+
+    rowType_ = std::make_shared<RowType>(std::move(names), std::move(types));
+
+    return rowVectors;
+  }
+};
+
+/// This test creates one single data file and one delete file. The parameter
+/// passed to assertSingleBaseFileSingleDeleteFile is the delete positions.
+TEST_F(IcebergReadTest, singleBaseFileSinglePositionalDeleteFile) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  assertSingleBaseFileSingleDeleteFile({{0, 1, 2, 3}});
+  // Delete the first and last row in each batch (10000 rows per batch)
+  assertSingleBaseFileSingleDeleteFile({{0, 9999, 10000, 19999}});
+  // Delete several rows in the second batch (10000 rows per batch)
+  assertSingleBaseFileSingleDeleteFile({{10000, 10002, 19999}});
+  // Delete random rows
+  assertSingleBaseFileSingleDeleteFile({makeRandomIncreasingValues(0, 20000)});
+  // Delete 0 rows
+  assertSingleBaseFileSingleDeleteFile({});
+  // Delete all rows
+  assertSingleBaseFileSingleDeleteFile(
+      {makeContinuousIncreasingValues(0, 20000)});
+  // Delete rows that don't exist
+  assertSingleBaseFileSingleDeleteFile({{20000, 29999}});
+}
+
+/// This test creates 3 base data files, only the middle one has corresponding
+/// delete positions. The parameter passed to
+/// assertSingleBaseFileSingleDeleteFile is the delete positions.for the middle
+/// base file.
+TEST_F(IcebergReadTest, MultipleBaseFilesSinglePositionalDeleteFile) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  assertMultipleBaseFileSingleDeleteFile({0, 1, 2, 3});
+  assertMultipleBaseFileSingleDeleteFile({0, 9999, 10000, 19999});
+  assertMultipleBaseFileSingleDeleteFile({10000, 10002, 19999});
+  assertMultipleBaseFileSingleDeleteFile({10000, 10002, 19999});
+  assertMultipleBaseFileSingleDeleteFile(
+      makeRandomIncreasingValues(0, rowCount));
+  assertMultipleBaseFileSingleDeleteFile({});
+  assertMultipleBaseFileSingleDeleteFile(
+      makeContinuousIncreasingValues(0, rowCount));
+}
+
+/// This test creates one base data file/split with multiple delete files. The
+/// parameter passed to assertSingleBaseFileMultipleDeleteFiles is the vector of
+/// delete files. Each leaf vector represents the delete positions in that
+/// delete file.
+TEST_F(IcebergReadTest, singleBaseFileMultiplePositionalDeleteFiles) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  // Delete row 0, 1, 2, 3 from the first batch out of two.
+  assertSingleBaseFileMultipleDeleteFiles({{1}, {2}, {3}, {4}});
+  // Delete the first and last row in each batch (10000 rows per batch).
+  assertSingleBaseFileMultipleDeleteFiles({{0}, {9999}, {10000}, {19999}});
+
+  assertSingleBaseFileMultipleDeleteFiles({{500, 21000}});
+
+  assertSingleBaseFileMultipleDeleteFiles(
+      {makeRandomIncreasingValues(0, 10000),
+       makeRandomIncreasingValues(10000, 20000),
+       makeRandomIncreasingValues(5000, 15000)});
+
+  assertSingleBaseFileMultipleDeleteFiles(
+      {makeContinuousIncreasingValues(0, 10000),
+       makeContinuousIncreasingValues(10000, 20000)});
+
+  assertSingleBaseFileMultipleDeleteFiles(
+      {makeContinuousIncreasingValues(0, 10000),
+       makeContinuousIncreasingValues(10000, 20000),
+       makeRandomIncreasingValues(5000, 15000)});
+
+  assertSingleBaseFileMultipleDeleteFiles(
+      {makeContinuousIncreasingValues(0, 20000),
+       makeContinuousIncreasingValues(0, 20000)});
+
+  assertSingleBaseFileMultipleDeleteFiles(
+      {makeRandomIncreasingValues(0, 20000),
+       {},
+       makeRandomIncreasingValues(5000, 15000)});
+
+  assertSingleBaseFileMultipleDeleteFiles({{}, {}});
+}
+
+/// This test creates 2 base data files, and 1 or 2 delete files, with unaligned
+/// RowGroup boundaries
+TEST_F(IcebergReadTest, multipleBaseFileMultiplePositionalDeleteFiles) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles;
+  std::unordered_map<
+      std::string,
+      std::multimap<std::string, std::vector<int64_t>>>
+      deleteFilesForBaseDatafiles;
+
+  // Create two data files, each with two RowGroups
+  rowGroupSizesForFiles["data_file_1"] = {100, 85};
+  rowGroupSizesForFiles["data_file_2"] = {99, 1};
+
+  // Delete 3 rows from the first RowGroup in data_file_1
+  deleteFilesForBaseDatafiles["delete_file_1"] = {{"data_file_1", {0, 1, 99}}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // Delete 3 rows from the second RowGroup in data_file_1
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", {100, 101, 184}}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // Delete random rows from the both RowGroups in data_file_1
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", makeRandomIncreasingValues(0, 185)}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // Delete all rows in data_file_1
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", makeContinuousIncreasingValues(0, 185)}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+  //
+  // Delete non-existent rows from data_file_1
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", makeRandomIncreasingValues(186, 300)}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // Delete several rows from both RowGroups in both data files
+  deleteFilesForBaseDatafiles.clear();
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", {0, 100, 102, 184}}, {"data_file_2", {1, 98, 99}}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // The delete file delete_file_1 contains 3 RowGroups itself, with the first 3
+  // deleting some repeating rows in data_file_1, and the last 2 RowGroups
+  // deleting some  repeating rows in data_file_2
+  deleteFilesForBaseDatafiles.clear();
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", {0, 1, 2, 3}},
+      {"data_file_1", {1, 2, 3, 4}},
+      {"data_file_1", makeRandomIncreasingValues(0, 185)},
+      {"data_file_2", {1, 3, 5, 7}},
+      {"data_file_2", makeRandomIncreasingValues(0, 100)}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // delete_file_2 contains non-overlapping delete rows for each data files in
+  // each RowGroup
+  deleteFilesForBaseDatafiles.clear();
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", {0, 1, 2, 3}}, {"data_file_2", {1, 3, 5, 7}}};
+  deleteFilesForBaseDatafiles["delete_file_2"] = {
+      {"data_file_1", {1, 2, 3, 4}},
+      {"data_file_1", {98, 99, 100, 101, 184}},
+      {"data_file_2", {3, 5, 7, 9}},
+      {"data_file_2", {98, 99, 100}}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+
+  // Two delete files each containing overlapping delete rows for both data
+  // files
+  deleteFilesForBaseDatafiles.clear();
+  deleteFilesForBaseDatafiles["delete_file_1"] = {
+      {"data_file_1", makeRandomIncreasingValues(0, 185)},
+      {"data_file_2", makeRandomIncreasingValues(0, 100)}};
+  deleteFilesForBaseDatafiles["delete_file_2"] = {
+      {"data_file_1", makeRandomIncreasingValues(10, 120)},
+      {"data_file_2", makeRandomIncreasingValues(50, 100)}};
+  assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles);
+}
+
+TEST_F(IcebergReadTest, positionalDeletesMultipleSplits) {
+  folly::SingletonVault::singleton()->registrationComplete();
+
+  assertMultipleSplits({1, 2, 3, 4}, 10, 5);
+  assertMultipleSplits({1, 2, 3, 4}, 10, 0);
+  assertMultipleSplits({1, 2, 3, 4}, 10, 10);
+  assertMultipleSplits({0, 9999, 10000, 19999}, 10, 3);
+  assertMultipleSplits(makeRandomIncreasingValues(0, 20000), 10, 3);
+  assertMultipleSplits(makeContinuousIncreasingValues(0, 20000), 10, 3);
+  assertMultipleSplits({}, 10, 3);
+
+  assertMultipleSplits({1, 2, 3, 4}, 10, 5, 30000, 3);
+  assertPositionalDeletes(
+      {
+          {"data_file_0", {500}},
+          {"data_file_1", {10000, 10000}},
+          {"data_file_2", {500}},
+      },
+      {{"delete_file_1",
+        {{"data_file_1", makeRandomIncreasingValues(0, 20000)}}}},
+      0,
+      3);
+
+  // Include only upper bound(which is exclusive) in delete positions for the
+  // second 10k batch of rows.
+  assertMultipleSplits({1000, 9000, 20000}, 1, 0, 20000, 3);
+}
+
+TEST_F(IcebergReadTest, testPartitionedRead) {
+  RowTypePtr rowType{ROW({"c0", "ds"}, {BIGINT(), DateType::get()})};
+  std::unordered_map<std::string, std::optional<std::string>> partitionKeys;
+  // Iceberg API sets partition values for dates to daysSinceEpoch, so
+  // in velox, we do not need to convert it to days.
+  // Test query on two partitions ds=17627(2018-04-06), ds=17628(2018-04-07)
+  std::vector<std::shared_ptr<ConnectorSplit>> splits;
+  std::vector<std::shared_ptr<TempFilePath>> dataFilePaths;
+  for (int i = 0; i <= 1; ++i) {
+    std::vector<RowVectorPtr> dataVectors;
+    int32_t daysSinceEpoch = 17627 + i;
+    VectorPtr c0 = makeFlatVector<int64_t>((std::vector<int64_t>){i});
+    VectorPtr ds =
+        makeFlatVector<int32_t>((std::vector<int32_t>){daysSinceEpoch});
+    dataVectors.push_back(makeRowVector({"c0", "ds"}, {c0, ds}));
+
+    auto dataFilePath = TempFilePath::create();
+    dataFilePaths.push_back(dataFilePath);
+    writeToFile(
+        dataFilePath->getPath(), dataVectors, config_, flushPolicyFactory_);
+    partitionKeys["ds"] = std::to_string(daysSinceEpoch);
+    auto icebergSplits =
+        makeIcebergSplits(dataFilePath->getPath(), {}, partitionKeys);
+    splits.insert(splits.end(), icebergSplits.begin(), icebergSplits.end());
+  }
+
+  connector::ColumnHandleMap assignments;
+  assignments.insert(
+      {"c0",
+       std::make_shared<IcebergColumnHandle>(
+           "c0",
+           IcebergColumnHandle::ColumnType::kRegular,
+           rowType->childAt(0))});
+
+  //  std::vector<velox::common::Subfield> requiredSubFields;
+  //  IcebergColumnHandle::ColumnParseParameters columnParseParameters;
+  //  columnParseParameters.partitionDateValueFormat =
+  //      IcebergColumnHandle::ColumnParseParameters::kDaysSinceEpoch;
+  assignments.insert(
+      {"ds",
+       std::make_shared<IcebergColumnHandle>(
+           "ds",
+           IcebergColumnHandle::ColumnType::kPartitionKey,
+           rowType->childAt(1))});
+
+  auto plan = PlanBuilder(pool_.get())
+                  .tableScan(rowType, {}, "", nullptr, assignments)
+                  .planNode();
+
+  assertQuery(
+      plan,
+      splits,
+      "SELECT * FROM (VALUES (0, '2018-04-06'), (1, '2018-04-07'))",
+      0);
+
+  // Test filter on non-partitioned non-date column
+  std::vector<std::string> nonPartitionFilters = {"c0 = 1"};
+  plan = PlanBuilder(pool_.get())
+             .tableScan(rowType, nonPartitionFilters, "", nullptr, assignments)
+             .planNode();
+
+  OperatorTestBase::assertQuery(plan, splits, "SELECT 1, '2018-04-07'");
+
+  // Test filter on non-partitioned date column
+  std::vector<std::string> filters = {"ds = date'2018-04-06'"};
+  plan = PlanBuilder(pool_.get()).tableScan(rowType, filters).planNode();
+
+  splits.clear();
+  for (auto& dataFilePath : dataFilePaths) {
+    auto icebergSplits = makeIcebergSplits(dataFilePath->getPath());
+    splits.insert(splits.end(), icebergSplits.begin(), icebergSplits.end());
+  }
+
+  assertQuery(plan, splits, "SELECT 0, '2018-04-06'");
+}
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.cpp b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.cpp
new file mode 100644
index 000000000000..98f57e3fb404
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.cpp
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.h"
+
+#include "velox/connectors/lakehouse/iceberg/IcebergConfig.h"
+
+#include <folly/executors/IOThreadPoolExecutor.h>
+
+#include <filesystem>
+
+using namespace facebook::velox;
+using namespace facebook::velox::dwio;
+using namespace facebook::velox::dwio::common;
+using namespace facebook::velox::dwrf;
+using namespace facebook::velox::memory;
+using namespace facebook::velox::connector::lakehouse::iceberg;
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+void IcebergSplitReaderBenchmark::writeToFile(
+    const std::vector<RowVectorPtr>& batches) {
+  auto path = fileFolder_->getPath() + "/" + fileName_;
+  auto localWriteFile = std::make_unique<LocalWriteFile>(path, true, false);
+  auto sink = std::make_unique<WriteFileSink>(std::move(localWriteFile), path);
+  dwrf::WriterOptions options;
+  options.memoryPool = rootPool_.get();
+  options.schema = batches[0]->type();
+  dwrf::Writer dataFilewriter{std::move(sink), options};
+  for (auto& batch : batches) {
+    dataFilewriter.write(batch);
+  }
+  dataFilewriter.flush();
+  dataFilewriter.close();
+}
+
+void IcebergSplitReaderBenchmark::writeToPositionDeleteFile(
+    const std::string& filePath,
+    const std::vector<RowVectorPtr>& vectors) {
+  auto localPosWriteFile =
+      std::make_unique<LocalWriteFile>(filePath, true, false);
+  auto posDeletesink =
+      std::make_unique<WriteFileSink>(std::move(localPosWriteFile), filePath);
+  dwrf::WriterOptions options;
+  options.memoryPool = rootPool_.get();
+  options.schema = vectors[0]->type();
+  dwrf::Writer posDeletewriter{std::move(posDeletesink), options};
+  for (size_t i = 0; i < vectors.size(); ++i) {
+    posDeletewriter.write(vectors[i]);
+  }
+  posDeletewriter.close();
+}
+
+std::vector<int64_t> IcebergSplitReaderBenchmark::makeRandomDeleteRows(
+    int32_t deleteRowsCount) {
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  int64_t totalDataRows = kNumBatches * kNumRowsPerBatch;
+  std::uniform_int_distribution<> dis(0, totalDataRows - 1);
+  std::set<int64_t> uniqueDeleteRows;
+  while (uniqueDeleteRows.size() < deleteRowsCount) {
+    uniqueDeleteRows.insert(dis(gen));
+  }
+  std::vector<int64_t> deleteRows(
+      uniqueDeleteRows.begin(), uniqueDeleteRows.end());
+  return deleteRows;
+}
+
+std::vector<int64_t> IcebergSplitReaderBenchmark::makeSequenceRows(
+    int32_t maxRowNumber) {
+  std::vector<int64_t> deleteRows;
+  deleteRows.resize(maxRowNumber);
+  std::iota(deleteRows.begin(), deleteRows.end(), 0);
+  return deleteRows;
+}
+
+std::vector<std::string> IcebergSplitReaderBenchmark::listFiles(
+    const std::string& dirPath) {
+  std::vector<std::string> files;
+  for (auto& dirEntry :
+       std::filesystem::recursive_directory_iterator(dirPath)) {
+    if (dirEntry.is_regular_file()) {
+      files.push_back(dirEntry.path().string());
+    }
+  }
+  return files;
+}
+
+std::shared_ptr<IcebergConnectorSplit>
+IcebergSplitReaderBenchmark::makeIcebergSplit(
+    const std::string& dataFilePath,
+    const std::vector<IcebergDeleteFile>& deleteFiles) {
+  std::unordered_map<std::string, std::optional<std::string>> partitionKeys;
+  //  std::unordered_map<std::string, std::string> customSplitInfo;
+  //  customSplitInfo["table_format"] = "hive-iceberg";
+
+  auto readFile = std::make_shared<LocalReadFile>(dataFilePath);
+  const int64_t fileSize = readFile->size();
+
+  return std::make_shared<IcebergConnectorSplit>(
+      kIcebergConnectorId,
+      dataFilePath,
+      fileFormat_, // dwio::common::FileFormat
+      /*start=*/0,
+      /*length=*/fileSize,
+      std::unordered_map<
+          std::string,
+          std::optional<std::string>>{}, // empty partitionKeys
+      std::unordered_map<std::string, std::string>{}, // empty serdeParameters
+      std::unordered_map<std::string, std::string>{}, // empty storageParameters
+      /*splitWeight=*/0,
+      /*cacheable=*/true,
+      deleteFiles // your vector<IcebergDeleteFile>
+      // infoColumns & properties will use their defaults
+  );
+}
+
+std::string IcebergSplitReaderBenchmark::writePositionDeleteFile(
+    const std::string& dataFilePath,
+    int64_t numDeleteRows) {
+  facebook::velox::test::VectorMaker vectorMaker{leafPool_.get()};
+  auto filePathVector =
+      vectorMaker.flatVector<StringView>(numDeleteRows, [&](auto row) {
+        if (row < numDeleteRows) {
+          return StringView(dataFilePath);
+        } else {
+          return StringView();
+        }
+      });
+
+  std::vector<int64_t> deleteRowsVec;
+  deleteRowsVec.reserve(numDeleteRows);
+  auto deleteRows = makeRandomDeleteRows(numDeleteRows);
+  deleteRowsVec.insert(
+      deleteRowsVec.end(), deleteRows.begin(), deleteRows.end());
+
+  auto deletePositionsVector = vectorMaker.flatVector<int64_t>(deleteRowsVec);
+
+  std::shared_ptr<IcebergMetadataColumn> pathColumn =
+      IcebergMetadataColumn::icebergDeleteFilePathColumn();
+  std::shared_ptr<IcebergMetadataColumn> posColumn =
+      IcebergMetadataColumn::icebergDeletePosColumn();
+  RowVectorPtr deleteFileVectors = vectorMaker.rowVector(
+      {pathColumn->name, posColumn->name},
+      {filePathVector, deletePositionsVector});
+
+  auto deleteFilePath = deleteFileFolder_->getPath() + "/" + "posDelete.data";
+  writeToPositionDeleteFile(deleteFilePath, std::vector{deleteFileVectors});
+
+  return deleteFilePath;
+}
+
+std::vector<std::shared_ptr<IcebergConnectorSplit>>
+IcebergSplitReaderBenchmark::createIcebergSplitsWithPositionalDelete(
+    int32_t deleteRowsPercentage,
+    int32_t deleteFilesCount) {
+  std::vector<std::shared_ptr<IcebergConnectorSplit>> splits;
+
+  std::vector<std::string> deleteFilePaths;
+  std::vector<std::string> dataFilePaths = listFiles(fileFolder_->getPath());
+
+  for (const auto& dataFilePath : dataFilePaths) {
+    std::vector<IcebergDeleteFile> deleteFiles;
+    int64_t deleteRowsCount =
+        kNumBatches * kNumRowsPerBatch * deleteRowsPercentage * 0.01;
+    deleteFiles.reserve(deleteRowsCount);
+    for (int i = 0; i < deleteFilesCount; i++) {
+      std::string deleteFilePath =
+          writePositionDeleteFile(dataFilePath, deleteRowsCount);
+
+      IcebergDeleteFile deleteFile(
+          FileContent::kPositionalDeletes,
+          deleteFilePath,
+          fileFormat_,
+          deleteRowsCount,
+          testing::internal::GetFileSize(
+              std::fopen(deleteFilePath.c_str(), "r")));
+      deleteFilePaths.emplace_back(deleteFilePath);
+      deleteFiles.emplace_back(deleteFile);
+    }
+    splits.emplace_back(makeIcebergSplit(dataFilePath, deleteFiles));
+  }
+  return splits;
+}
+
+FilterSpec IcebergSplitReaderBenchmark::createFilterSpec(
+    const std::string& columnName,
+    float startPct,
+    float selectPct,
+    const TypePtr& type,
+    bool isForRowGroupSkip,
+    bool allowNulls) {
+  switch (type->childAt(0)->kind()) {
+    case TypeKind::BIGINT:
+    case TypeKind::INTEGER:
+      return FilterSpec(
+          columnName,
+          startPct,
+          selectPct,
+          FilterKind::kBigintRange,
+          isForRowGroupSkip,
+          allowNulls);
+    default:
+      VELOX_FAIL("Unsupported Data Type {}", type->childAt(0)->toString());
+  }
+  return FilterSpec(columnName, startPct, selectPct, FilterKind(), false);
+}
+
+std::shared_ptr<ScanSpec> IcebergSplitReaderBenchmark::createScanSpec(
+    const std::vector<RowVectorPtr>& batches,
+    RowTypePtr& rowType,
+    const std::vector<FilterSpec>& filterSpecs,
+    std::vector<uint64_t>& hitRows,
+    SubfieldFilters& filters) {
+  std::unique_ptr<FilterGenerator> filterGenerator =
+      std::make_unique<FilterGenerator>(rowType, 0);
+  filters = filterGenerator->makeSubfieldFilters(
+      filterSpecs, batches, nullptr, hitRows);
+  auto scanSpec = filterGenerator->makeScanSpec(std::move(filters));
+  return scanSpec;
+}
+
+// This method is the place where we do the read operations using
+// icebergSplitReader. scanSpec contains the setting of filters. e.g.
+// filterRateX100 = 30 means it would filter out 70% of rows and 30% remain.
+// deleteRateX100 = 30 means it would delete 30% of overall data rows and 70%
+// remain. Return the number of rows after the filter and delete.
+int IcebergSplitReaderBenchmark::read(
+    const RowTypePtr& rowType,
+    uint32_t nextSize,
+    std::unique_ptr<IcebergSplitReader> icebergSplitReader) {
+  runtimeStats_ = RuntimeStatistics();
+  icebergSplitReader->resetFilterCaches();
+  int resultSize = 0;
+  auto result = BaseVector::create(rowType, 0, leafPool_.get());
+  while (true) {
+    bool hasData = icebergSplitReader->next(nextSize, result);
+    if (!hasData) {
+      break;
+    }
+    auto rowsRemaining = result->size();
+    resultSize += rowsRemaining;
+  }
+  icebergSplitReader->updateRuntimeStats(runtimeStats_);
+  return resultSize;
+}
+
+void IcebergSplitReaderBenchmark::readSingleColumn(
+    const std::string& columnName,
+    const TypePtr& type,
+    float startPct,
+    float selectPct,
+    float deletePct,
+    uint32_t nextSize) {
+  folly::BenchmarkSuspender suspender;
+  auto rowType = ROW({columnName}, {type});
+
+  auto batches =
+      dataSetBuilder_->makeDataset(rowType, kNumBatches, kNumRowsPerBatch)
+          .withRowGroupSpecificData(kNumRowsPerRowGroup)
+          .withNullsForField(Subfield(columnName), 0)
+          .build();
+  writeToFile(*batches);
+  std::vector<FilterSpec> filterSpecs;
+
+  filterSpecs.emplace_back(
+      createFilterSpec(columnName, startPct, selectPct, rowType, false, false));
+
+  std::vector<uint64_t> hitRows;
+  SubfieldFilters filters;
+  auto scanSpec =
+      createScanSpec(*batches, rowType, filterSpecs, hitRows, filters);
+
+  std::vector<std::shared_ptr<IcebergConnectorSplit>> splits =
+      createIcebergSplitsWithPositionalDelete(deletePct, 1);
+
+  core::TypedExprPtr remainingFilterExpr;
+
+  auto icebergTableHandle = std::make_shared<IcebergTableHandle>(
+      kIcebergConnectorId,
+      "tableName",
+      false, // TODO: verify filterPushdownEnabled
+      std::move(filters),
+      remainingFilterExpr,
+      rowType);
+
+  std::shared_ptr<IcebergConfig> icebergConfig =
+      std::make_shared<IcebergConfig>(std::make_shared<config::ConfigBase>(
+          std::unordered_map<std::string, std::string>(), true));
+  const RowTypePtr readerOutputType;
+  const std::shared_ptr<io::IoStatistics> ioStats =
+      std::make_shared<io::IoStatistics>();
+  const std::shared_ptr<filesystems::File::IoStats> fsStats =
+      std::make_shared<filesystems::File::IoStats>();
+
+  std::shared_ptr<memory::MemoryPool> root =
+      memory::memoryManager()->addRootPool(
+          "IcebergSplitReader", kMaxMemory, MemoryReclaimer::create());
+  std::shared_ptr<memory::MemoryPool> opPool = root->addLeafChild("operator");
+  std::shared_ptr<memory::MemoryPool> connectorPool =
+      root->addAggregateChild(kIcebergConnectorId, MemoryReclaimer::create());
+  std::shared_ptr<config::ConfigBase> connectorSessionProperties_ =
+      std::make_shared<config::ConfigBase>(
+          std::unordered_map<std::string, std::string>());
+
+  std::unique_ptr<connector::ConnectorQueryCtx> connectorQueryCtx_ =
+      std::make_unique<connector::ConnectorQueryCtx>(
+          opPool.get(),
+          connectorPool.get(),
+          connectorSessionProperties_.get(),
+          nullptr,
+          velox::common::PrefixSortConfig(),
+          nullptr,
+          nullptr,
+          "query.IcebergSplitReader",
+          "task.IcebergSplitReader",
+          "planNodeId.IcebergSplitReader",
+          0,
+          "");
+
+  FileHandleFactory fileHandleFactory(
+      std::make_unique<SimpleLRUCache<FileHandleKey, FileHandle>>(
+          icebergConfig->numCacheFileHandles()),
+      std::make_unique<FileHandleGenerator>(connectorSessionProperties_));
+
+  suspender.dismiss();
+
+  auto ioExecutor = std::make_unique<folly::IOThreadPoolExecutor>(3);
+  std::shared_ptr<exec::ExprSet> remainingFilterExprSet{nullptr};
+  std::atomic<uint64_t> totalRemainingFilterMs;
+
+  uint64_t resultSize = 0;
+  for (std::shared_ptr<IcebergConnectorSplit> split : splits) {
+    scanSpec->resetCachedValues(true);
+
+    std::unique_ptr<IcebergSplitReader> icebergSplitReader =
+        std::make_unique<IcebergSplitReader>(
+            split,
+            icebergTableHandle,
+            nullptr,
+            connectorQueryCtx_.get(),
+            icebergConfig,
+            rowType,
+            ioStats,
+            fsStats,
+            &fileHandleFactory,
+            ioExecutor.get(),
+            scanSpec);
+
+    std::shared_ptr<random::RandomSkipTracker> randomSkip;
+    icebergSplitReader->configureReaderOptions(randomSkip);
+    icebergSplitReader->prepareSplit(nullptr, runtimeStats_);
+
+    // Filter range is generated from a small sample data of 4096 rows. So the
+    // upperBound and lowerBound are introduced to estimate the result size.
+    resultSize += read(rowType, nextSize, std::move(icebergSplitReader));
+  }
+  // Calculate the expected number of rows after the filters.
+  // Add one to expected to avoid 0 in calculating upperBound and lowerBound.
+  int expected = kNumBatches * kNumRowsPerBatch * ((double)selectPct / 100) *
+          (1 - (double)deletePct / 100) +
+      1;
+
+  // Make the upperBound and lowerBound large enough to avoid very small
+  // resultSize and expected size, where the diff ratio is relatively very
+  // large.
+  int upperBound = expected * (1 + kFilterErrorMargin) + 1;
+  int lowerBound = expected * (1 - kFilterErrorMargin) - 1;
+  upperBound = std::max(16, upperBound);
+  lowerBound = std::max(0, lowerBound);
+
+  VELOX_CHECK(
+      resultSize <= upperBound && resultSize >= lowerBound,
+      "Result Size {} and Expected Size {} Mismatch",
+      resultSize,
+      expected);
+}
+
+void run(
+    uint32_t,
+    const std::string& columnName,
+    const TypePtr& type,
+    float filterRateX100,
+    float deleteRateX100,
+    uint32_t nextSize) {
+  RowTypePtr rowType = ROW({columnName}, {type});
+  IcebergSplitReaderBenchmark benchmark;
+  BIGINT()->toString();
+  benchmark.readSingleColumn(
+      columnName, type, 0, filterRateX100, deleteRateX100, nextSize);
+}
+
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.h b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.h
new file mode 100644
index 000000000000..1af70e353721
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmark.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnectorSplit.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergDeleteFile.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergMetadataColumns.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergSplitReader.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergTableHandle.h"
+#include "velox/dwio/common/tests/utils/DataSetBuilder.h"
+#include "velox/dwio/dwrf/RegisterDwrfReader.h"
+#include "velox/dwio/dwrf/writer/Writer.h"
+#include "velox/exec/tests/utils/TempDirectoryPath.h"
+#include "velox/type/Filter.h"
+#include "velox/vector/tests/utils/VectorTestBase.h"
+
+#include <folly/Benchmark.h>
+#include <folly/init/Init.h>
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+
+constexpr uint32_t kNumRowsPerBatch = 20000;
+constexpr uint32_t kNumBatches = 50;
+constexpr uint32_t kNumRowsPerRowGroup = 10000;
+constexpr double kFilterErrorMargin = 0.2;
+
+class IcebergSplitReaderBenchmark {
+ public:
+  explicit IcebergSplitReaderBenchmark() {
+    rootPool_ =
+        memory::memoryManager()->addRootPool("IcebergSplitReaderBenchmark");
+    leafPool_ = rootPool_->addLeafChild("IcebergSplitReaderBenchmark");
+    dataSetBuilder_ =
+        std::make_unique<facebook::velox::test::DataSetBuilder>(*leafPool_, 0);
+    filesystems::registerLocalFileSystem();
+    dwrf::registerDwrfReaderFactory();
+  }
+
+  ~IcebergSplitReaderBenchmark() {}
+
+  void writeToFile(const std::vector<RowVectorPtr>& batches);
+
+  void writeToPositionDeleteFile(
+      const std::string& filePath,
+      const std::vector<RowVectorPtr>& vectors);
+
+  dwio::common::FilterSpec createFilterSpec(
+      const std::string& columnName,
+      float startPct,
+      float selectPct,
+      const TypePtr& type,
+      bool isForRowGroupSkip,
+      bool allowNulls);
+
+  std::shared_ptr<dwio::common::ScanSpec> createScanSpec(
+      const std::vector<RowVectorPtr>& batches,
+      RowTypePtr& rowType,
+      const std::vector<dwio::common::FilterSpec>& filterSpecs,
+      std::vector<uint64_t>& hitRows,
+      velox::common::SubfieldFilters& filters);
+
+  int read(
+      const RowTypePtr& rowType,
+      uint32_t nextSize,
+      std::unique_ptr<connector::lakehouse::iceberg::IcebergSplitReader>
+          icebergSplitReader);
+
+  void readSingleColumn(
+      const std::string& columnName,
+      const TypePtr& type,
+      float startPct,
+      float selectPct,
+      float deleteRate,
+      uint32_t nextSize);
+
+  std::vector<std::shared_ptr<IcebergConnectorSplit>>
+  createIcebergSplitsWithPositionalDelete(
+      int32_t deleteRowsPercentage,
+      int32_t deleteFilesCount);
+
+  std::vector<std::string> listFiles(const std::string& dirPath);
+
+  std::shared_ptr<connector::lakehouse::iceberg::IcebergConnectorSplit>
+  makeIcebergSplit(
+      const std::string& dataFilePath,
+      const std::vector<connector::lakehouse::iceberg::IcebergDeleteFile>&
+          deleteFiles = {});
+
+  std::vector<int64_t> makeRandomDeleteRows(int32_t deleteRowsCount);
+
+  std::vector<int64_t> makeSequenceRows(int32_t maxRowNumber);
+
+  std::string writePositionDeleteFile(
+      const std::string& dataFilePath,
+      int64_t numDeleteRows);
+
+ private:
+  const std::string fileName_ = "test.data";
+  const std::shared_ptr<exec::test::TempDirectoryPath> fileFolder_ =
+      exec::test::TempDirectoryPath::create();
+  const std::shared_ptr<exec::test::TempDirectoryPath> deleteFileFolder_ =
+      exec::test::TempDirectoryPath::create();
+
+  std::unique_ptr<facebook::velox::test::DataSetBuilder> dataSetBuilder_;
+  std::shared_ptr<memory::MemoryPool> rootPool_;
+  std::shared_ptr<memory::MemoryPool> leafPool_;
+  std::unique_ptr<dwrf::Writer> writer_;
+  dwio::common::RuntimeStatistics runtimeStats_;
+
+  dwio::common::FileFormat fileFormat_{dwio::common::FileFormat::DWRF};
+  const std::string kIcebergConnectorId = "hive-iceberg";
+};
+
+void run(
+    uint32_t,
+    const std::string& columnName,
+    const TypePtr& type,
+    float filterRateX100,
+    float deleteRateX100,
+    uint32_t nextSize);
+
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkMain.cpp b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkMain.cpp
new file mode 100644
index 000000000000..4766662590d4
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkMain.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergSplitReaderBenchmark.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::dwio;
+using namespace facebook::velox::dwio::common;
+using namespace facebook::velox::dwrf;
+using namespace facebook::velox::connector::lakehouse::iceberg::test;
+using namespace facebook::velox::test;
+
+#define PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, _deletes_) \
+  BENCHMARK_NAMED_PARAM(                                                       \
+      run,                                                                     \
+      _name_##_Filter_##_filter_##_Delete_##_deletes_##_next_5k,               \
+      #_name_,                                                                 \
+      _type_,                                                                  \
+      _filter_,                                                                \
+      _deletes_,                                                               \
+      5000);                                                                   \
+  BENCHMARK_NAMED_PARAM(                                                       \
+      run,                                                                     \
+      _name_##_Filter_##_filter_##_Delete_##_deletes_##_next_10k,              \
+      #_name_,                                                                 \
+      _type_,                                                                  \
+      _filter_,                                                                \
+      _deletes_,                                                               \
+      10000);                                                                  \
+  BENCHMARK_DRAW_LINE();
+
+#define PARQUET_BENCHMARKS_FILTERS(_type_, _name_, _filter_)      \
+  PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, 0)  \
+  PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, 20) \
+  PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, 50) \
+  PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, 70) \
+  PARQUET_BENCHMARKS_FILTER_DELETES(_type_, _name_, _filter_, 100)
+
+#define PARQUET_BENCHMARKS(_type_, _name_)        \
+  PARQUET_BENCHMARKS_FILTERS(_type_, _name_, 0)   \
+  PARQUET_BENCHMARKS_FILTERS(_type_, _name_, 20)  \
+  PARQUET_BENCHMARKS_FILTERS(_type_, _name_, 50)  \
+  PARQUET_BENCHMARKS_FILTERS(_type_, _name_, 70)  \
+  PARQUET_BENCHMARKS_FILTERS(_type_, _name_, 100) \
+  BENCHMARK_DRAW_LINE();
+
+PARQUET_BENCHMARKS(BIGINT(), BigInt);
+
+int main(int argc, char** argv) {
+  folly::Init init{&argc, &argv};
+  memory::MemoryManager::initialize({});
+  folly::runBenchmarks();
+  return 0;
+}
diff --git a/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkTest.cpp b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkTest.cpp
new file mode 100644
index 000000000000..b2afd2d3b343
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/IcebergSplitReaderBenchmarkTest.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "IcebergSplitReaderBenchmark.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::connector::lakehouse::iceberg::test {
+namespace {
+TEST(IcebergSplitReaderBenchmarkTest, basic) {
+  memory::MemoryManager::testingSetInstance(memory::MemoryManager::Options{});
+  run(1, "BigInt", BIGINT(), 20, 0, 500);
+  run(1, "BigInt", BIGINT(), 50, 20, 500);
+  run(1, "BigInt", BIGINT(), 100, 20, 500);
+  run(1, "BigInt", BIGINT(), 100, 100, 500);
+}
+} // namespace
+} // namespace facebook::velox::connector::lakehouse::iceberg::test
diff --git a/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.cpp b/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.cpp
new file mode 100644
index 000000000000..1cc90b8ebefb
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.cpp
@@ -0,0 +1,2201 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/iceberg/tests/PlanBuilder.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergConnector.h"
+#include "velox/connectors/lakehouse/iceberg/IcebergTableHandle.h"
+#include "velox/connectors/tpcds/TpcdsConnector.h"
+#include "velox/connectors/tpch/TpchConnector.h"
+#include "velox/duckdb/conversion/DuckParser.h"
+#include "velox/exec/Aggregate.h"
+#include "velox/exec/HashPartitionFunction.h"
+#include "velox/exec/RoundRobinPartitionFunction.h"
+#include "velox/exec/TableWriter.h"
+#include "velox/exec/WindowFunction.h"
+#include "velox/exec/tests/utils/AggregationResolver.h"
+#include "velox/exec/tests/utils/FilterToExpression.h"
+#include "velox/expression/ExprToSubfieldFilter.h"
+#include "velox/expression/VectorReaders.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::connector;
+using namespace facebook::velox::connector::lakehouse::iceberg;
+
+namespace facebook::velox::exec::test {
+core::TypedExprPtr parseExpr(
+    const std::string& text,
+    const RowTypePtr& rowType,
+    const parse::ParseOptions& options,
+    memory::MemoryPool* pool) {
+  auto untyped = parse::parseExpr(text, options);
+  return core::Expressions::inferTypes(untyped, rowType, pool);
+}
+
+PlanBuilder& PlanBuilder::tableScan(
+    const RowTypePtr& outputType,
+    const std::vector<std::string>& subfieldFilters,
+    const std::string& remainingFilter,
+    const RowTypePtr& dataColumns,
+    const connector::ColumnHandleMap& assignments) {
+  return TableScanBuilder(*this)
+      .filtersAsNode(filtersAsNode_ ? planNodeIdGenerator_ : nullptr)
+      .outputType(outputType)
+      .assignments(assignments)
+      .dataColumns(dataColumns)
+      .subfieldFilters(subfieldFilters)
+      .remainingFilter(remainingFilter)
+      .endTableScan();
+}
+
+PlanBuilder& PlanBuilder::tableScan(
+    const std::string& tableName,
+    const RowTypePtr& outputType,
+    const std::unordered_map<std::string, std::string>& columnAliases,
+    const std::vector<std::string>& subfieldFilters,
+    const std::string& remainingFilter,
+    const RowTypePtr& dataColumns,
+    const connector::ColumnHandleMap& assignments) {
+  return TableScanBuilder(*this)
+      .filtersAsNode(filtersAsNode_ ? planNodeIdGenerator_ : nullptr)
+      .tableName(tableName)
+      .outputType(outputType)
+      .columnAliases(columnAliases)
+      .dataColumns(dataColumns)
+
+      .subfieldFilters(subfieldFilters)
+      .remainingFilter(remainingFilter)
+      .assignments(assignments)
+      .endTableScan();
+}
+
+PlanBuilder& PlanBuilder::tableScanWithPushDown(
+    const RowTypePtr& outputType,
+    const PushdownConfig& pushdownConfig,
+    const RowTypePtr& dataColumns,
+    const connector::ColumnHandleMap& assignments) {
+  return TableScanBuilder(*this)
+      .filtersAsNode(filtersAsNode_ ? planNodeIdGenerator_ : nullptr)
+      .outputType(outputType)
+      .assignments(assignments)
+      .dataColumns(dataColumns)
+      .subfieldFiltersMap(pushdownConfig.subfieldFiltersMap)
+      .remainingFilter(pushdownConfig.remainingFilter)
+      .endTableScan();
+}
+
+PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::subfieldFilters(
+    std::vector<std::string> subfieldFilters) {
+  VELOX_CHECK(subfieldFiltersMap_.empty());
+
+  if (subfieldFilters.empty()) {
+    return *this;
+  }
+
+  // Parse subfield filters
+  auto queryCtx = core::QueryCtx::create();
+  exec::SimpleExpressionEvaluator evaluator(queryCtx.get(), planBuilder_.pool_);
+  const RowTypePtr& parseType = dataColumns_ ? dataColumns_ : outputType_;
+
+  for (const auto& filter : subfieldFilters) {
+    auto untypedExpr = parse::parseExpr(filter, planBuilder_.options_);
+
+    // Parse directly to subfieldFiltersMap_
+    auto filterExpr = core::Expressions::inferTypes(
+        untypedExpr, parseType, planBuilder_.pool_);
+    auto [subfield, subfieldFilter] =
+        exec::toSubfieldFilter(filterExpr, &evaluator);
+
+    auto it = columnAliases_.find(subfield.toString());
+    if (it != columnAliases_.end()) {
+      subfield = common::Subfield(it->second);
+    }
+    VELOX_CHECK_EQ(
+        subfieldFiltersMap_.count(subfield),
+        0,
+        "Duplicate subfield: {}",
+        subfield.toString());
+
+    subfieldFiltersMap_[std::move(subfield)] = std::move(subfieldFilter);
+  }
+  return *this;
+}
+
+PlanBuilder::TableScanBuilder&
+PlanBuilder::TableScanBuilder::subfieldFiltersMap(
+    const common::SubfieldFilters& filtersMap) {
+  for (const auto& [k, v] : filtersMap) {
+    subfieldFiltersMap_[k.clone()] = v->clone();
+  }
+  return *this;
+}
+
+PlanBuilder::TableScanBuilder& PlanBuilder::TableScanBuilder::remainingFilter(
+    std::string remainingFilter) {
+  if (!remainingFilter.empty()) {
+    remainingFilter_ = parse::parseExpr(remainingFilter, planBuilder_.options_);
+  }
+  return *this;
+}
+
+namespace {
+void addConjunct(
+    const core::TypedExprPtr& conjunct,
+    core::TypedExprPtr& conjunction) {
+  if (!conjunction) {
+    conjunction = conjunct;
+  } else {
+    conjunction = std::make_shared<core::CallTypedExpr>(
+        BOOLEAN(), "and", conjunction, conjunct);
+  }
+}
+} // namespace
+
+core::PlanNodePtr PlanBuilder::TableScanBuilder::build(core::PlanNodeId id) {
+  VELOX_CHECK_NOT_NULL(outputType_, "outputType must be specified");
+  std::unordered_map<std::string, core::TypedExprPtr> typedMapping;
+  bool hasAssignments = !(assignments_.empty());
+  for (uint32_t i = 0; i < outputType_->size(); ++i) {
+    const auto& name = outputType_->nameOf(i);
+    const auto& type = outputType_->childAt(i);
+
+    std::string columnName = name;
+    auto it = columnAliases_.find(name);
+    if (it != columnAliases_.end()) {
+      columnName = it->second;
+      typedMapping.emplace(
+          name, std::make_shared<core::FieldAccessTypedExpr>(type, columnName));
+    }
+
+    if (!hasAssignments) {
+      assignments_.insert(
+          {name,
+           std::make_shared<IcebergColumnHandle>(
+               columnName, IcebergColumnHandle::ColumnType::kRegular, type)});
+    }
+  }
+
+  const RowTypePtr& parseType = dataColumns_ ? dataColumns_ : outputType_;
+
+  core::TypedExprPtr filterNodeExpr;
+
+  if (filtersAsNode_) {
+    for (const auto& [subfield, filter] : subfieldFiltersMap_) {
+      auto filterExpr = core::test::filterToExpr(
+          subfield, filter.get(), parseType, planBuilder_.pool_);
+
+      addConjunct(filterExpr, filterNodeExpr);
+    }
+
+    subfieldFiltersMap_.clear();
+  }
+
+  core::TypedExprPtr remainingFilterExpr;
+  if (remainingFilter_) {
+    remainingFilterExpr = core::Expressions::inferTypes(
+                              remainingFilter_, parseType, planBuilder_.pool_)
+                              ->rewriteInputNames(typedMapping);
+    if (filtersAsNode_) {
+      addConjunct(remainingFilterExpr, filterNodeExpr);
+      remainingFilterExpr = nullptr;
+    }
+  }
+
+  if (!tableHandle_) {
+    tableHandle_ = std::make_shared<IcebergTableHandle>(
+        connectorId_,
+        tableName_,
+        true,
+        std::move(subfieldFiltersMap_),
+        remainingFilterExpr,
+        dataColumns_);
+  }
+  core::PlanNodePtr result = std::make_shared<core::TableScanNode>(
+      id, outputType_, tableHandle_, assignments_);
+
+  if (filtersAsNode_ && filterNodeExpr) {
+    auto filterId = planNodeIdGenerator_->next();
+    result =
+        std::make_shared<core::FilterNode>(filterId, filterNodeExpr, result);
+  }
+  return result;
+}
+
+PlanBuilder& PlanBuilder::values(
+    const std::vector<RowVectorPtr>& values,
+    bool parallelizable,
+    size_t repeatTimes) {
+  VELOX_CHECK_NULL(planNode_, "Values must be the source node");
+  auto valuesCopy = values;
+  planNode_ = std::make_shared<core::ValuesNode>(
+      nextPlanNodeId(), std::move(valuesCopy), parallelizable, repeatTimes);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::traceScan(
+    const std::string& traceNodeDir,
+    uint32_t pipelineId,
+    std::vector<uint32_t> driverIds,
+    const RowTypePtr& outputType) {
+  planNode_ = std::make_shared<core::TraceScanNode>(
+      nextPlanNodeId(),
+      traceNodeDir,
+      pipelineId,
+      std::move(driverIds),
+      outputType);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::exchange(
+    const RowTypePtr& outputType,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NULL(planNode_, "Exchange must be the source node");
+  planNode_ = std::make_shared<core::ExchangeNode>(
+      nextPlanNodeId(), outputType, serdeKind);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+std::pair<
+    std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>,
+    std::vector<core::SortOrder>>
+parseOrderByClauses(
+    const std::vector<std::string>& keys,
+    const RowTypePtr& inputType,
+    memory::MemoryPool* pool) {
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> sortingKeys;
+  std::vector<core::SortOrder> sortingOrders;
+  for (const auto& key : keys) {
+    auto orderBy = parse::parseOrderByExpr(key);
+    auto typedExpr =
+        core::Expressions::inferTypes(orderBy.expr, inputType, pool);
+
+    auto sortingKey =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(typedExpr);
+    VELOX_CHECK_NOT_NULL(
+        sortingKey,
+        "ORDER BY clause must use a column name, not an expression: {}",
+        key);
+    sortingKeys.emplace_back(sortingKey);
+    sortingOrders.emplace_back(orderBy.ascending, orderBy.nullsFirst);
+  }
+
+  return {sortingKeys, sortingOrders};
+}
+} // namespace
+
+PlanBuilder& PlanBuilder::mergeExchange(
+    const RowTypePtr& outputType,
+    const std::vector<std::string>& keys,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NULL(planNode_, "MergeExchange must be the source node");
+  auto [sortingKeys, sortingOrders] =
+      parseOrderByClauses(keys, outputType, pool_);
+
+  planNode_ = std::make_shared<core::MergeExchangeNode>(
+      nextPlanNodeId(), outputType, sortingKeys, sortingOrders, serdeKind);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::optionalProject(
+    const std::vector<std::string>& optionalProjections) {
+  if (optionalProjections.empty()) {
+    return *this;
+  }
+  return project(optionalProjections);
+}
+
+PlanBuilder& PlanBuilder::projectExpressions(
+    const std::vector<std::shared_ptr<const core::IExpr>>& projections) {
+  std::vector<core::TypedExprPtr> expressions;
+  std::vector<std::string> projectNames;
+  for (auto i = 0; i < projections.size(); ++i) {
+    expressions.push_back(inferTypes(projections[i]));
+    if (projections[i]->alias().has_value()) {
+      projectNames.push_back(projections[i]->alias().value());
+    } else if (
+        auto fieldExpr =
+            dynamic_cast<const core::FieldAccessExpr*>(projections[i].get())) {
+      projectNames.push_back(fieldExpr->name());
+    } else {
+      projectNames.push_back(fmt::format("p{}", i));
+    }
+  }
+  planNode_ = std::make_shared<core::ProjectNode>(
+      nextPlanNodeId(),
+      std::move(projectNames),
+      std::move(expressions),
+      planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::projectExpressions(
+    const std::vector<std::shared_ptr<const core::ITypedExpr>>& projections) {
+  std::vector<core::TypedExprPtr> expressions;
+  std::vector<std::string> projectNames;
+  for (auto i = 0; i < projections.size(); ++i) {
+    expressions.push_back(projections[i]);
+    if (auto fieldExpr =
+            dynamic_cast<const core::FieldAccessExpr*>(projections[i].get())) {
+      projectNames.push_back(fieldExpr->name());
+    } else {
+      projectNames.push_back(fmt::format("p{}", i));
+    }
+  }
+  planNode_ = std::make_shared<core::ProjectNode>(
+      nextPlanNodeId(),
+      std::move(projectNames),
+      std::move(expressions),
+      planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::project(const std::vector<std::string>& projections) {
+  VELOX_CHECK_NOT_NULL(planNode_, "Project cannot be the source node");
+  std::vector<std::shared_ptr<const core::IExpr>> expressions;
+  expressions.reserve(projections.size());
+  for (auto i = 0; i < projections.size(); ++i) {
+    expressions.push_back(parse::parseExpr(projections[i], options_));
+  }
+  return projectExpressions(expressions);
+}
+
+PlanBuilder& PlanBuilder::parallelProject(
+    const std::vector<std::vector<std::string>>& projectionGroups,
+    const std::vector<std::string>& noLoadColumns) {
+  VELOX_CHECK_NOT_NULL(planNode_, "ParallelProject cannot be the source node");
+
+  std::vector<std::string> names;
+
+  std::vector<std::vector<core::TypedExprPtr>> exprGroups;
+  exprGroups.reserve(projectionGroups.size());
+
+  size_t i = 0;
+
+  for (const auto& group : projectionGroups) {
+    std::vector<core::TypedExprPtr> typedExprs;
+    typedExprs.reserve(group.size());
+
+    for (const auto& expr : group) {
+      const auto typedExpr = inferTypes(parse::parseExpr(expr, options_));
+      typedExprs.push_back(typedExpr);
+
+      if (auto fieldExpr =
+              dynamic_cast<const core::FieldAccessExpr*>(typedExpr.get())) {
+        names.push_back(fieldExpr->name());
+      } else {
+        names.push_back(fmt::format("p{}", i));
+      }
+
+      ++i;
+    }
+    exprGroups.push_back(std::move(typedExprs));
+  }
+
+  planNode_ = std::make_shared<core::ParallelProjectNode>(
+      nextPlanNodeId(),
+      std::move(names),
+      std::move(exprGroups),
+      noLoadColumns,
+      planNode_);
+
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::lazyDereference(
+    const std::vector<std::string>& projections) {
+  VELOX_CHECK_NOT_NULL(planNode_, "LazyDeference cannot be the source node");
+  std::vector<core::TypedExprPtr> expressions;
+  std::vector<std::string> projectNames;
+  for (auto i = 0; i < projections.size(); ++i) {
+    auto expr = inferTypes(parse::parseExpr(projections[i], options_));
+    expressions.push_back(expr);
+    if (auto* fieldExpr =
+            dynamic_cast<const core::FieldAccessExpr*>(expr.get())) {
+      projectNames.push_back(fieldExpr->name());
+    } else {
+      projectNames.push_back(fmt::format("p{}", i));
+    }
+  }
+  planNode_ = std::make_shared<core::LazyDereferenceNode>(
+      nextPlanNodeId(),
+      std::move(projectNames),
+      std::move(expressions),
+      planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::appendColumns(
+    const std::vector<std::string>& newColumns) {
+  VELOX_CHECK_NOT_NULL(planNode_, "Project cannot be the source node");
+  std::vector<std::string> allProjections = planNode_->outputType()->names();
+  for (const auto& column : newColumns) {
+    allProjections.push_back(column);
+  }
+
+  return project(allProjections);
+}
+
+PlanBuilder& PlanBuilder::optionalFilter(const std::string& optionalFilter) {
+  if (optionalFilter.empty()) {
+    return *this;
+  }
+  return filter(optionalFilter);
+}
+
+PlanBuilder& PlanBuilder::filter(const std::string& filter) {
+  VELOX_CHECK_NOT_NULL(planNode_, "Filter cannot be the source node");
+  auto expr = parseExpr(filter, planNode_->outputType(), options_, pool_);
+  planNode_ =
+      std::make_shared<core::FilterNode>(nextPlanNodeId(), expr, planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+core::PlanNodePtr PlanBuilder::createIntermediateOrFinalAggregation(
+    core::AggregationNode::Step step,
+    const core::AggregationNode* partialAggNode) {
+  // Create intermediate or final aggregation using same grouping keys and same
+  // aggregate function names.
+  const auto& partialAggregates = partialAggNode->aggregates();
+  const auto& groupingKeys = partialAggNode->groupingKeys();
+
+  auto numAggregates = partialAggregates.size();
+  auto numGroupingKeys = groupingKeys.size();
+
+  std::vector<core::AggregationNode::Aggregate> aggregates;
+  aggregates.reserve(numAggregates);
+  for (auto i = 0; i < numAggregates; i++) {
+    // Resolve final or intermediate aggregation result type using raw input
+    // types for the partial aggregation.
+    auto name = partialAggregates[i].call->name();
+    auto rawInputs = partialAggregates[i].call->inputs();
+
+    core::AggregationNode::Aggregate aggregate;
+    for (auto& rawInput : rawInputs) {
+      aggregate.rawInputTypes.push_back(rawInput->type());
+    }
+
+    auto type =
+        resolveAggregateType(name, step, aggregate.rawInputTypes, false);
+    std::vector<core::TypedExprPtr> inputs = {field(numGroupingKeys + i)};
+
+    // Add lambda inputs.
+    for (const auto& rawInput : rawInputs) {
+      if (rawInput->type()->kind() == TypeKind::FUNCTION) {
+        inputs.push_back(rawInput);
+      }
+    }
+
+    aggregate.call =
+        std::make_shared<core::CallTypedExpr>(type, std::move(inputs), name);
+    aggregates.emplace_back(aggregate);
+  }
+
+  auto aggregationNode = std::make_shared<core::AggregationNode>(
+      nextPlanNodeId(),
+      step,
+      groupingKeys,
+      partialAggNode->preGroupedKeys(),
+      partialAggNode->aggregateNames(),
+      aggregates,
+      partialAggNode->ignoreNullKeys(),
+      planNode_);
+  VELOX_CHECK_EQ(
+      aggregationNode->supportsBarrier(), aggregationNode->isPreGrouped());
+  return aggregationNode;
+}
+
+namespace {
+/// Checks that specified plan node is a partial or intermediate aggregation or
+/// local exchange over the same. Returns a pointer to core::AggregationNode.
+const core::AggregationNode* findPartialAggregation(
+    const core::PlanNode* planNode) {
+  const core::AggregationNode* aggNode;
+  if (auto exchange = dynamic_cast<const core::LocalPartitionNode*>(planNode)) {
+    aggNode = dynamic_cast<const core::AggregationNode*>(
+        exchange->sources()[0].get());
+  } else if (auto merge = dynamic_cast<const core::LocalMergeNode*>(planNode)) {
+    aggNode =
+        dynamic_cast<const core::AggregationNode*>(merge->sources()[0].get());
+  } else {
+    aggNode = dynamic_cast<const core::AggregationNode*>(planNode);
+  }
+  VELOX_CHECK_NOT_NULL(
+      aggNode,
+      "Current plan node must be one of: partial or intermediate aggregation, "
+      "local merge or exchange. Got: {}",
+      planNode->toString());
+  VELOX_CHECK(exec::isPartialOutput(aggNode->step()));
+  return aggNode;
+}
+} // namespace
+
+PlanBuilder& PlanBuilder::intermediateAggregation() {
+  const auto* aggNode = findPartialAggregation(planNode_.get());
+  VELOX_CHECK(exec::isRawInput(aggNode->step()));
+
+  auto step = core::AggregationNode::Step::kIntermediate;
+
+  planNode_ = createIntermediateOrFinalAggregation(step, aggNode);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::finalAggregation() {
+  const auto* aggNode = findPartialAggregation(planNode_.get());
+
+  if (!exec::isRawInput(aggNode->step())) {
+    // If aggregation node is not the partial aggregation, keep looking again.
+    aggNode = findPartialAggregation(aggNode->sources()[0].get());
+    VELOX_CHECK_NOT_NULL(aggNode);
+  }
+
+  VELOX_CHECK(exec::isRawInput(aggNode->step()));
+  VELOX_CHECK(exec::isPartialOutput(aggNode->step()));
+
+  auto step = core::AggregationNode::Step::kFinal;
+
+  planNode_ = createIntermediateOrFinalAggregation(step, aggNode);
+  return *this;
+}
+
+PlanBuilder::AggregatesAndNames PlanBuilder::createAggregateExpressionsAndNames(
+    const std::vector<std::string>& aggregates,
+    const std::vector<std::string>& masks,
+    core::AggregationNode::Step step,
+    const std::vector<std::vector<TypePtr>>& rawInputTypes) {
+  if (step == core::AggregationNode::Step::kPartial ||
+      step == core::AggregationNode::Step::kSingle) {
+    VELOX_CHECK(
+        rawInputTypes.empty(),
+        "Do not provide raw inputs types for partial or single aggregation");
+  } else {
+    VELOX_CHECK_EQ(
+        aggregates.size(),
+        rawInputTypes.size(),
+        "Do provide raw inputs types for final or intermediate aggregation");
+  }
+
+  std::vector<core::AggregationNode::Aggregate> aggs;
+
+  AggregateTypeResolver resolver(step);
+  std::vector<std::string> names;
+  aggs.reserve(aggregates.size());
+  names.reserve(aggregates.size());
+
+  duckdb::ParseOptions options;
+  options.parseIntegerAsBigint = options_.parseIntegerAsBigint;
+
+  for (auto i = 0; i < aggregates.size(); i++) {
+    auto& aggregate = aggregates[i];
+
+    if (!rawInputTypes.empty()) {
+      resolver.setRawInputTypes(rawInputTypes[i]);
+    }
+
+    auto untypedExpr = duckdb::parseAggregateExpr(aggregate, options);
+
+    core::AggregationNode::Aggregate agg;
+
+    agg.call = std::dynamic_pointer_cast<const core::CallTypedExpr>(
+        inferTypes(untypedExpr.expr));
+
+    if (step == core::AggregationNode::Step::kPartial ||
+        step == core::AggregationNode::Step::kSingle) {
+      for (const auto& input : agg.call->inputs()) {
+        agg.rawInputTypes.push_back(input->type());
+      }
+    } else {
+      agg.rawInputTypes = rawInputTypes[i];
+    }
+
+    if (untypedExpr.maskExpr != nullptr) {
+      auto maskExpr =
+          std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+              inferTypes(untypedExpr.maskExpr));
+      VELOX_CHECK_NOT_NULL(
+          maskExpr,
+          "FILTER clause must use a column name, not an expression: {}",
+          aggregate);
+      agg.mask = maskExpr;
+    }
+
+    if (i < masks.size() && !masks[i].empty()) {
+      VELOX_CHECK_NULL(
+          agg.mask,
+          "Aggregation mask should be specified only once (either explicitly or using FILTER clause)");
+      agg.mask = field(masks[i]);
+    }
+
+    agg.distinct = untypedExpr.distinct;
+
+    if (!untypedExpr.orderBy.empty()) {
+      auto* entry = exec::getAggregateFunctionEntry(agg.call->name());
+      const auto& metadata = entry->metadata;
+      if (metadata.orderSensitive) {
+        VELOX_CHECK(
+            step == core::AggregationNode::Step::kSingle,
+            "Order sensitive aggregation over sorted inputs cannot be split "
+            "into partial and final: {}.",
+            aggregate);
+      }
+    }
+
+    for (const auto& orderBy : untypedExpr.orderBy) {
+      auto sortingKey =
+          std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+              inferTypes(orderBy.expr));
+      VELOX_CHECK_NOT_NULL(
+          sortingKey,
+          "ORDER BY clause must use a column name, not an expression: {}",
+          aggregate);
+
+      agg.sortingKeys.push_back(sortingKey);
+      agg.sortingOrders.emplace_back(orderBy.ascending, orderBy.nullsFirst);
+    }
+
+    aggs.emplace_back(agg);
+
+    if (untypedExpr.expr->alias().has_value()) {
+      names.push_back(untypedExpr.expr->alias().value());
+    } else {
+      names.push_back(fmt::format("a{}", i));
+    }
+  }
+
+  return {aggs, names};
+}
+
+PlanBuilder& PlanBuilder::aggregation(
+    const std::vector<std::string>& groupingKeys,
+    const std::vector<std::string>& preGroupedKeys,
+    const std::vector<std::string>& aggregates,
+    const std::vector<std::string>& masks,
+    core::AggregationNode::Step step,
+    bool ignoreNullKeys,
+    const std::vector<std::vector<TypePtr>>& rawInputTypes) {
+  auto aggregatesAndNames = createAggregateExpressionsAndNames(
+      aggregates, masks, step, rawInputTypes);
+
+  // If the aggregationNode is over a GroupId, then global grouping sets
+  // need to be populated.
+  std::vector<vector_size_t> globalGroupingSets;
+  std::optional<core::FieldAccessTypedExprPtr> groupId;
+  if (auto groupIdNode =
+          dynamic_cast<const core::GroupIdNode*>(planNode_.get())) {
+    for (auto i = 0; i < groupIdNode->groupingSets().size(); i++) {
+      if (groupIdNode->groupingSets().at(i).empty()) {
+        globalGroupingSets.push_back(i);
+      }
+    }
+
+    if (!globalGroupingSets.empty()) {
+      // GroupId is the last column of the GroupIdNode.
+      groupId = field(groupIdNode->outputType()->names().back());
+    }
+  }
+
+  auto aggregationNode = std::make_shared<core::AggregationNode>(
+      nextPlanNodeId(),
+      step,
+      fields(groupingKeys),
+      fields(preGroupedKeys),
+      aggregatesAndNames.names,
+      aggregatesAndNames.aggregates,
+      globalGroupingSets,
+      groupId,
+      ignoreNullKeys,
+      planNode_);
+  VELOX_CHECK_EQ(
+      aggregationNode->supportsBarrier(), aggregationNode->isPreGrouped());
+  planNode_ = std::move(aggregationNode);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::streamingAggregation(
+    const std::vector<std::string>& groupingKeys,
+    const std::vector<std::string>& aggregates,
+    const std::vector<std::string>& masks,
+    core::AggregationNode::Step step,
+    bool ignoreNullKeys) {
+  auto aggregatesAndNames =
+      createAggregateExpressionsAndNames(aggregates, masks, step);
+  auto aggregationNode = std::make_shared<core::AggregationNode>(
+      nextPlanNodeId(),
+      step,
+      fields(groupingKeys),
+      fields(groupingKeys),
+      aggregatesAndNames.names,
+      aggregatesAndNames.aggregates,
+      ignoreNullKeys,
+      planNode_);
+  VELOX_CHECK_EQ(
+      aggregationNode->supportsBarrier(), aggregationNode->isPreGrouped());
+  planNode_ = std::move(aggregationNode);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::groupId(
+    const std::vector<std::string>& groupingKeys,
+    const std::vector<std::vector<std::string>>& groupingSets,
+    const std::vector<std::string>& aggregationInputs,
+    std::string groupIdName) {
+  std::vector<core::GroupIdNode::GroupingKeyInfo> groupingKeyInfos;
+  groupingKeyInfos.reserve(groupingKeys.size());
+  for (const auto& groupingKey : groupingKeys) {
+    auto untypedExpr = parse::parseExpr(groupingKey, options_);
+    const auto* fieldAccessExpr =
+        dynamic_cast<const core::FieldAccessExpr*>(untypedExpr.get());
+    VELOX_USER_CHECK(
+        fieldAccessExpr,
+        "Grouping key {} is not valid projection",
+        groupingKey);
+    std::string inputField = fieldAccessExpr->name();
+    std::string outputField = untypedExpr->alias().has_value()
+        ?
+        // This is a projection with a column alias with the format
+        // "input_col as output_col".
+        untypedExpr->alias().value()
+        :
+        // This is a projection without a column alias.
+        fieldAccessExpr->name();
+
+    core::GroupIdNode::GroupingKeyInfo keyInfos;
+    keyInfos.output = outputField;
+    keyInfos.input = field(inputField);
+    groupingKeyInfos.push_back(keyInfos);
+  }
+
+  planNode_ = std::make_shared<core::GroupIdNode>(
+      nextPlanNodeId(),
+      groupingSets,
+      std::move(groupingKeyInfos),
+      fields(aggregationInputs),
+      std::move(groupIdName),
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+core::PlanNodePtr createLocalMergeNode(
+    const core::PlanNodeId& id,
+    const std::vector<std::string>& keys,
+    std::vector<core::PlanNodePtr> sources,
+    memory::MemoryPool* pool) {
+  const auto& inputType = sources[0]->outputType();
+  auto [sortingKeys, sortingOrders] =
+      parseOrderByClauses(keys, inputType, pool);
+
+  return std::make_shared<core::LocalMergeNode>(
+      id, std::move(sortingKeys), std::move(sortingOrders), std::move(sources));
+}
+} // namespace
+
+PlanBuilder& PlanBuilder::localMerge(const std::vector<std::string>& keys) {
+  planNode_ = createLocalMergeNode(nextPlanNodeId(), keys, {planNode_}, pool_);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::expand(
+    const std::vector<std::vector<std::string>>& projections) {
+  VELOX_CHECK(!projections.empty(), "projections must not be empty.");
+  const auto numColumns = projections[0].size();
+  const auto numRows = projections.size();
+  std::vector<std::string> aliases;
+  aliases.reserve(numColumns);
+
+  std::vector<std::vector<core::TypedExprPtr>> projectExprs;
+  projectExprs.reserve(projections.size());
+
+  for (auto i = 0; i < numRows; i++) {
+    std::vector<core::TypedExprPtr> projectExpr;
+    VELOX_CHECK_EQ(numColumns, projections[i].size());
+    for (auto j = 0; j < numColumns; j++) {
+      auto untypedExpression = parse::parseExpr(projections[i][j], options_);
+      auto typedExpression = inferTypes(untypedExpression);
+
+      if (i == 0) {
+        if (untypedExpression->alias().has_value()) {
+          aliases.push_back(untypedExpression->alias().value());
+        } else {
+          auto fieldExpr = dynamic_cast<const core::FieldAccessExpr*>(
+              untypedExpression.get());
+          VELOX_CHECK_NOT_NULL(fieldExpr);
+          aliases.push_back(fieldExpr->name());
+        }
+        projectExpr.push_back(typedExpression);
+      } else {
+        // The types of values in 2nd and subsequent rows must much types in the
+        //  1st row.
+        const auto& expectedType = projectExprs[0][j]->type();
+        if (typedExpression->type()->equivalent(*expectedType)) {
+          projectExpr.push_back(typedExpression);
+        } else {
+          auto constantExpr =
+              dynamic_cast<const core::ConstantExpr*>(untypedExpression.get());
+          VELOX_CHECK_NOT_NULL(constantExpr);
+          VELOX_CHECK(constantExpr->value().isNull());
+          projectExpr.push_back(std::make_shared<core::ConstantTypedExpr>(
+              expectedType, variant::null(expectedType->kind())));
+        }
+      }
+    }
+    projectExprs.push_back(projectExpr);
+  }
+
+  planNode_ = std::make_shared<core::ExpandNode>(
+      nextPlanNodeId(), projectExprs, std::move(aliases), planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::localMerge(
+    const std::vector<std::string>& keys,
+    std::vector<core::PlanNodePtr> sources) {
+  VELOX_CHECK_NULL(planNode_, "localMerge() must be the first call");
+  VELOX_CHECK_GE(
+      sources.size(), 1, "localMerge() requires at least one source");
+
+  planNode_ =
+      createLocalMergeNode(nextPlanNodeId(), keys, std::move(sources), pool_);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::orderBy(
+    const std::vector<std::string>& keys,
+    bool isPartial) {
+  VELOX_CHECK_NOT_NULL(planNode_, "OrderBy cannot be the source node");
+  auto [sortingKeys, sortingOrders] =
+      parseOrderByClauses(keys, planNode_->outputType(), pool_);
+
+  planNode_ = std::make_shared<core::OrderByNode>(
+      nextPlanNodeId(), sortingKeys, sortingOrders, isPartial, planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::topN(
+    const std::vector<std::string>& keys,
+    int32_t count,
+    bool isPartial) {
+  VELOX_CHECK_NOT_NULL(planNode_, "TopN cannot be the source node");
+  auto [sortingKeys, sortingOrders] =
+      parseOrderByClauses(keys, planNode_->outputType(), pool_);
+  planNode_ = std::make_shared<core::TopNNode>(
+      nextPlanNodeId(),
+      sortingKeys,
+      sortingOrders,
+      count,
+      isPartial,
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::limit(int64_t offset, int64_t count, bool isPartial) {
+  planNode_ = std::make_shared<core::LimitNode>(
+      nextPlanNodeId(), offset, count, isPartial, planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::enforceSingleRow() {
+  planNode_ =
+      std::make_shared<core::EnforceSingleRowNode>(nextPlanNodeId(), planNode_);
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::assignUniqueId(
+    const std::string& idName,
+    const int32_t taskUniqueId) {
+  planNode_ = std::make_shared<core::AssignUniqueIdNode>(
+      nextPlanNodeId(), idName, taskUniqueId, planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+core::PartitionFunctionSpecPtr createPartitionFunctionSpec(
+    const RowTypePtr& inputType,
+    const std::vector<core::TypedExprPtr>& keys,
+    memory::MemoryPool* pool) {
+  if (keys.empty()) {
+    return std::make_shared<core::GatherPartitionFunctionSpec>();
+  } else {
+    std::vector<column_index_t> keyIndices;
+    keyIndices.reserve(keys.size());
+
+    std::vector<VectorPtr> constValues;
+    constValues.reserve(keys.size());
+
+    for (const auto& key : keys) {
+      if (auto field =
+              std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+                  key)) {
+        keyIndices.push_back(inputType->getChildIdx(field->name()));
+      } else if (
+          auto constant =
+              std::dynamic_pointer_cast<const core::ConstantTypedExpr>(key)) {
+        keyIndices.push_back(kConstantChannel);
+        constValues.push_back(constant->toConstantVector(pool));
+      } else {
+        VELOX_UNREACHABLE();
+      }
+    }
+    return std::make_shared<HashPartitionFunctionSpec>(
+        inputType, std::move(keyIndices), std::move(constValues));
+  }
+}
+
+RowTypePtr concat(const RowTypePtr& a, const RowTypePtr& b) {
+  std::vector<std::string> names = a->names();
+  std::vector<TypePtr> types = a->children();
+  names.insert(names.end(), b->names().begin(), b->names().end());
+  types.insert(types.end(), b->children().begin(), b->children().end());
+  return ROW(std::move(names), std::move(types));
+}
+
+RowTypePtr extract(
+    const RowTypePtr& type,
+    const std::vector<std::string>& childNames) {
+  std::vector<std::string> names = childNames;
+
+  std::vector<TypePtr> types;
+  types.reserve(childNames.size());
+  for (const auto& name : childNames) {
+    types.emplace_back(type->findChild(name));
+  }
+  return ROW(std::move(names), std::move(types));
+}
+
+// Rename columns in the given row type.
+RowTypePtr rename(
+    const RowTypePtr& type,
+    const std::vector<std::string>& newNames) {
+  VELOX_CHECK_EQ(
+      type->size(),
+      newNames.size(),
+      "Number of types and new type names should be the same");
+  std::vector<std::string> names{newNames};
+  std::vector<TypePtr> types{type->children()};
+  return ROW(std::move(names), std::move(types));
+}
+
+core::PlanNodePtr createLocalPartitionNode(
+    const core::PlanNodeId& planNodeId,
+    const std::vector<core::TypedExprPtr>& keys,
+    bool scaleWriter,
+    const std::vector<core::PlanNodePtr>& sources,
+    memory::MemoryPool* pool) {
+  auto partitionFunctionFactory =
+      createPartitionFunctionSpec(sources[0]->outputType(), keys, pool);
+  return std::make_shared<core::LocalPartitionNode>(
+      planNodeId,
+      keys.empty() ? core::LocalPartitionNode::Type::kGather
+                   : core::LocalPartitionNode::Type::kRepartition,
+      scaleWriter,
+      partitionFunctionFactory,
+      sources);
+}
+} // namespace
+
+PlanBuilder& PlanBuilder::partitionedOutput(
+    const std::vector<std::string>& keys,
+    int numPartitions,
+    const std::vector<std::string>& outputLayout,
+    VectorSerde::Kind serdeKind) {
+  return partitionedOutput(keys, numPartitions, false, outputLayout, serdeKind);
+}
+
+PlanBuilder& PlanBuilder::partitionedOutput(
+    const std::vector<std::string>& keys,
+    int numPartitions,
+    bool replicateNullsAndAny,
+    const std::vector<std::string>& outputLayout,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NOT_NULL(
+      planNode_, "PartitionedOutput cannot be the source node");
+
+  auto keyExprs = exprs(keys, planNode_->outputType());
+  return partitionedOutput(
+      keys,
+      numPartitions,
+      replicateNullsAndAny,
+      createPartitionFunctionSpec(planNode_->outputType(), keyExprs, pool_),
+      outputLayout,
+      serdeKind);
+}
+
+PlanBuilder& PlanBuilder::partitionedOutput(
+    const std::vector<std::string>& keys,
+    int numPartitions,
+    bool replicateNullsAndAny,
+    core::PartitionFunctionSpecPtr partitionFunctionSpec,
+    const std::vector<std::string>& outputLayout,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NOT_NULL(
+      planNode_, "PartitionedOutput cannot be the source node");
+  auto outputType = outputLayout.empty()
+      ? planNode_->outputType()
+      : extract(planNode_->outputType(), outputLayout);
+  planNode_ = std::make_shared<core::PartitionedOutputNode>(
+      nextPlanNodeId(),
+      core::PartitionedOutputNode::Kind::kPartitioned,
+      exprs(keys, planNode_->outputType()),
+      numPartitions,
+      replicateNullsAndAny,
+      std::move(partitionFunctionSpec),
+      outputType,
+      serdeKind,
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::partitionedOutputBroadcast(
+    const std::vector<std::string>& outputLayout,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NOT_NULL(
+      planNode_, "PartitionedOutput cannot be the source node");
+  auto outputType = outputLayout.empty()
+      ? planNode_->outputType()
+      : extract(planNode_->outputType(), outputLayout);
+  planNode_ = core::PartitionedOutputNode::broadcast(
+      nextPlanNodeId(), 1, outputType, serdeKind, planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::partitionedOutputArbitrary(
+    const std::vector<std::string>& outputLayout,
+    VectorSerde::Kind serdeKind) {
+  VELOX_CHECK_NOT_NULL(
+      planNode_, "PartitionedOutput cannot be the source node");
+  auto outputType = outputLayout.empty()
+      ? planNode_->outputType()
+      : extract(planNode_->outputType(), outputLayout);
+  planNode_ = core::PartitionedOutputNode::arbitrary(
+      nextPlanNodeId(), outputType, serdeKind, planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::localPartition(
+    const std::vector<std::string>& keys,
+    const std::vector<core::PlanNodePtr>& sources) {
+  VELOX_CHECK_NULL(planNode_, "localPartition() must be the first call");
+  planNode_ = createLocalPartitionNode(
+      nextPlanNodeId(),
+      exprs(keys, sources[0]->outputType()),
+      /*scaleWriter=*/false,
+      sources,
+      pool_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::localPartition(const std::vector<std::string>& keys) {
+  planNode_ = createLocalPartitionNode(
+      nextPlanNodeId(),
+      exprs(keys, planNode_->outputType()),
+      /*scaleWriter=*/false,
+      {planNode_},
+      pool_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::scaleWriterlocalPartition(
+    const std::vector<std::string>& keys) {
+  std::vector<column_index_t> keyIndices;
+  keyIndices.reserve(keys.size());
+  for (const auto& key : keys) {
+    keyIndices.push_back(planNode_->outputType()->getChildIdx(key));
+  }
+  auto icebergPartitionFunctionFactory =
+      std::make_shared<IcebergPartitionFunctionSpec>(
+          1009, keyIndices, std::vector<VectorPtr>{});
+  planNode_ = std::make_shared<core::LocalPartitionNode>(
+      nextPlanNodeId(),
+      core::LocalPartitionNode::Type::kRepartition,
+      true,
+      icebergPartitionFunctionFactory,
+      std::vector{planNode_});
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::localPartition(
+    int numBuckets,
+    const std::vector<column_index_t>& bucketChannels,
+    const std::vector<VectorPtr>& constValues) {
+  auto icebergPartitionFunctionFactory =
+      std::make_shared<IcebergPartitionFunctionSpec>(
+          numBuckets, bucketChannels, constValues);
+  planNode_ = std::make_shared<core::LocalPartitionNode>(
+      nextPlanNodeId(),
+      core::LocalPartitionNode::Type::kRepartition,
+      /*scaleWriter=*/false,
+      std::move(icebergPartitionFunctionFactory),
+      std::vector<core::PlanNodePtr>{planNode_});
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+// PlanBuilder& PlanBuilder::localPartitionByBucket(
+//     const std::shared_ptr<IcebergBucketProperty>&
+//         bucketProperty) {
+//   VELOX_CHECK_NOT_NULL(planNode_, "LocalPartition cannot be the source node"); std::vector<column_index_t> bucketChannels; for (const auto& bucketColumn : bucketProperty->bucketedBy()) {
+//     bucketChannels.push_back(
+//         planNode_->outputType()->getChildIdx(bucketColumn));
+//   }
+//   auto icebergPartitionFunctionFactory =
+//       std::make_shared<IcebergPartitionFunctionSpec>(
+//           bucketProperty->bucketCount(),
+//           bucketChannels,
+//           std::vector<VectorPtr>{});
+//   planNode_ = std::make_shared<core::LocalPartitionNode>(
+//       nextPlanNodeId(),
+//       core::LocalPartitionNode::Type::kRepartition,
+//       /*scaleWriter=*/false,
+//       std::move(icebergPartitionFunctionFactory),
+//       std::vector<core::PlanNodePtr>{planNode_});
+//   VELOX_CHECK(planNode_->supportsBarrier());
+//   return *this;
+// }
+
+namespace {
+core::PlanNodePtr createLocalPartitionRoundRobinNode(
+    const core::PlanNodeId& planNodeId,
+    bool scaleWriter,
+    const std::vector<core::PlanNodePtr>& sources) {
+  return std::make_shared<core::LocalPartitionNode>(
+      planNodeId,
+      core::LocalPartitionNode::Type::kRepartition,
+      scaleWriter,
+      std::make_shared<RoundRobinPartitionFunctionSpec>(),
+      sources);
+}
+} // namespace
+
+PlanBuilder& PlanBuilder::localPartitionRoundRobin(
+    const std::vector<core::PlanNodePtr>& sources) {
+  VELOX_CHECK_NULL(
+      planNode_, "localPartitionRoundRobin() must be the first call");
+  planNode_ = createLocalPartitionRoundRobinNode(
+      nextPlanNodeId(), /*scaleWriter=*/false, sources);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::localPartitionRoundRobin() {
+  planNode_ = createLocalPartitionRoundRobinNode(
+      nextPlanNodeId(), /*scaleWriter=*/false, {planNode_});
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::scaleWriterlocalPartitionRoundRobin() {
+  planNode_ = createLocalPartitionRoundRobinNode(
+      nextPlanNodeId(), /*scaleWriter=*/true, {planNode_});
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+class RoundRobinRowPartitionFunction : public core::PartitionFunction {
+ public:
+  explicit RoundRobinRowPartitionFunction(int numPartitions)
+      : numPartitions_{numPartitions} {}
+
+  std::optional<uint32_t> partition(
+      const RowVector& input,
+      std::vector<uint32_t>& partitions) override {
+    auto size = input.size();
+    partitions.resize(size);
+    for (auto i = 0; i < size; ++i) {
+      partitions[i] = counter_ % numPartitions_;
+      ++counter_;
+    }
+    return std::nullopt;
+  }
+
+ private:
+  const int numPartitions_;
+  uint32_t counter_{0};
+};
+
+class RoundRobinRowPartitionFunctionSpec : public core::PartitionFunctionSpec {
+ public:
+  std::unique_ptr<core::PartitionFunction> create(
+      int numPartitions,
+      bool /*localExchange*/) const override {
+    return std::make_unique<RoundRobinRowPartitionFunction>(numPartitions);
+  }
+
+  std::string toString() const override {
+    return "ROUND ROBIN ROW";
+  }
+
+  folly::dynamic serialize() const override {
+    folly::dynamic obj = folly::dynamic::object;
+    obj["name"] = fmt::format("RoundRobinRowPartitionFunctionSpec");
+    return obj;
+  }
+
+  static core::PartitionFunctionSpecPtr deserialize(
+      const folly::dynamic& /*obj*/,
+      void* /*context*/) {
+    return std::make_shared<RoundRobinRowPartitionFunctionSpec>();
+  }
+};
+} // namespace
+
+PlanBuilder& PlanBuilder::localPartitionRoundRobinRow() {
+  planNode_ = std::make_shared<core::LocalPartitionNode>(
+      nextPlanNodeId(),
+      core::LocalPartitionNode::Type::kRepartition,
+      /*scaleWriter=*/false,
+      std::make_shared<RoundRobinRowPartitionFunctionSpec>(),
+      std::vector<core::PlanNodePtr>{planNode_});
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::hashJoin(
+    const std::vector<std::string>& leftKeys,
+    const std::vector<std::string>& rightKeys,
+    const core::PlanNodePtr& build,
+    const std::string& filter,
+    const std::vector<std::string>& outputLayout,
+    core::JoinType joinType,
+    bool nullAware) {
+  VELOX_CHECK_NOT_NULL(planNode_, "HashJoin cannot be the source node");
+  VELOX_CHECK_EQ(leftKeys.size(), rightKeys.size());
+
+  auto leftType = planNode_->outputType();
+  auto rightType = build->outputType();
+  auto resultType = concat(leftType, rightType);
+  core::TypedExprPtr filterExpr;
+  if (!filter.empty()) {
+    filterExpr = parseExpr(filter, resultType, options_, pool_);
+  }
+
+  RowTypePtr outputType;
+  if (isLeftSemiProjectJoin(joinType) || isRightSemiProjectJoin(joinType)) {
+    std::vector<std::string> names = outputLayout;
+
+    // Last column in 'outputLayout' must be a boolean 'match'.
+    std::vector<TypePtr> types;
+    types.reserve(outputLayout.size());
+    for (auto i = 0; i < outputLayout.size() - 1; ++i) {
+      types.emplace_back(resultType->findChild(outputLayout[i]));
+    }
+    types.emplace_back(BOOLEAN());
+
+    outputType = ROW(std::move(names), std::move(types));
+  } else {
+    outputType = extract(resultType, outputLayout);
+  }
+
+  auto leftKeyFields = fields(leftType, leftKeys);
+  auto rightKeyFields = fields(rightType, rightKeys);
+
+  planNode_ = std::make_shared<core::HashJoinNode>(
+      nextPlanNodeId(),
+      joinType,
+      nullAware,
+      leftKeyFields,
+      rightKeyFields,
+      std::move(filterExpr),
+      std::move(planNode_),
+      build,
+      outputType);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::mergeJoin(
+    const std::vector<std::string>& leftKeys,
+    const std::vector<std::string>& rightKeys,
+    const core::PlanNodePtr& build,
+    const std::string& filter,
+    const std::vector<std::string>& outputLayout,
+    core::JoinType joinType) {
+  VELOX_CHECK_NOT_NULL(planNode_, "MergeJoin cannot be the source node");
+  VELOX_CHECK_EQ(leftKeys.size(), rightKeys.size());
+
+  auto leftType = planNode_->outputType();
+  auto rightType = build->outputType();
+  auto resultType = concat(leftType, rightType);
+  core::TypedExprPtr filterExpr;
+  if (!filter.empty()) {
+    filterExpr = parseExpr(filter, resultType, options_, pool_);
+  }
+  auto outputType = extract(resultType, outputLayout);
+  auto leftKeyFields = fields(leftType, leftKeys);
+  auto rightKeyFields = fields(rightType, rightKeys);
+
+  planNode_ = std::make_shared<core::MergeJoinNode>(
+      nextPlanNodeId(),
+      joinType,
+      leftKeyFields,
+      rightKeyFields,
+      std::move(filterExpr),
+      std::move(planNode_),
+      build,
+      outputType);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::nestedLoopJoin(
+    const core::PlanNodePtr& right,
+    const std::vector<std::string>& outputLayout,
+    core::JoinType joinType) {
+  return nestedLoopJoin(right, "", outputLayout, joinType);
+}
+
+PlanBuilder& PlanBuilder::nestedLoopJoin(
+    const core::PlanNodePtr& right,
+    const std::string& joinCondition,
+    const std::vector<std::string>& outputLayout,
+    core::JoinType joinType) {
+  VELOX_CHECK_NOT_NULL(planNode_, "NestedLoopJoin cannot be the source node");
+  auto resultType = concat(planNode_->outputType(), right->outputType());
+  if (isLeftSemiProjectJoin(joinType)) {
+    resultType = concat(resultType, ROW({"match"}, {BOOLEAN()}));
+  }
+
+  auto outputType = extract(resultType, outputLayout);
+
+  core::TypedExprPtr joinConditionExpr{};
+  if (!joinCondition.empty()) {
+    joinConditionExpr = parseExpr(joinCondition, resultType, options_, pool_);
+  }
+
+  planNode_ = std::make_shared<core::NestedLoopJoinNode>(
+      nextPlanNodeId(),
+      joinType,
+      std::move(joinConditionExpr),
+      std::move(planNode_),
+      right,
+      outputType);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+core::TypedExprPtr removeCastTypedExpr(const core::TypedExprPtr& expr) {
+  core::TypedExprPtr convertedTypedExpr = expr;
+  while (auto castTypedExpr =
+             std::dynamic_pointer_cast<const core::CastTypedExpr>(
+                 convertedTypedExpr)) {
+    VELOX_CHECK_EQ(castTypedExpr->inputs().size(), 1);
+    convertedTypedExpr = castTypedExpr->inputs()[0];
+  }
+  return convertedTypedExpr;
+}
+
+template <TypeKind SrcKind, TypeKind DstKind>
+core::TypedExprPtr castConstantArrayConditionInput(
+    const core::ConstantTypedExprPtr& constantExpr) {
+  if (SrcKind == DstKind) {
+    return constantExpr;
+  }
+
+  auto srcVector = constantExpr->valueVector();
+  BaseVector::flattenVector(srcVector);
+  auto* srcArrayVector = srcVector->asChecked<velox::ArrayVector>();
+  VELOX_CHECK_EQ(srcArrayVector->size(), 1);
+  using SrcCppType = typename velox::TypeTraits<SrcKind>::NativeType;
+  auto* srcValueVector = srcArrayVector->elements()->asFlatVector<SrcCppType>();
+
+  const auto dstType = createScalarType(DstKind);
+  auto dstValueVector = BaseVector::create(
+      dstType, srcValueVector->size(), srcArrayVector->pool());
+  using DstCppType = typename velox::TypeTraits<DstKind>::NativeType;
+  auto* dstFlatValueVector =
+      dstValueVector->template asFlatVector<DstCppType>();
+
+  velox::DecodedVector decodedSrcValueVector{*srcValueVector};
+  velox::exec::VectorReader<SrcCppType> srcValueReader{&decodedSrcValueVector};
+  for (auto row = 0; row < srcValueVector->size(); ++row) {
+    const auto value = srcValueReader[row];
+    dstFlatValueVector->set(row, static_cast<DstCppType>(value));
+  }
+  auto dstArrayVector = std::make_shared<ArrayVector>(
+      srcArrayVector->pool(),
+      ARRAY(dstType),
+      nullptr,
+      1,
+      srcArrayVector->offsets(),
+      srcArrayVector->sizes(),
+      dstValueVector);
+  return std::make_shared<core::ConstantTypedExpr>(dstArrayVector);
+}
+
+template <TypeKind SrcKind, TypeKind DstKind>
+core::TypedExprPtr castConstantConditionInput(
+    const core::ConstantTypedExprPtr& constantExpr) {
+  if (SrcKind == DstKind) {
+    return constantExpr;
+  }
+  const auto dstType = createScalarType(DstKind);
+  return std::make_shared<core::ConstantTypedExpr>(
+      dstType,
+      static_cast<typename TypeTraits<DstKind>::NativeType>(
+          constantExpr->value().value<SrcKind>()));
+}
+
+template <TypeKind Kind>
+core::TypedExprPtr castIndexConditionInputExpr(const core::TypedExprPtr& expr) {
+  core::TypedExprPtr convertedTypedExpr = removeCastTypedExpr(expr);
+  if (std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+          convertedTypedExpr)) {
+    VELOX_CHECK(
+        convertedTypedExpr->type()->kind() == Kind ||
+        std::dynamic_pointer_cast<const ArrayType>(convertedTypedExpr->type())
+                ->elementType()
+                ->kind() == Kind);
+    return convertedTypedExpr;
+  }
+
+  const auto constantTypedExpr =
+      std::dynamic_pointer_cast<const core::ConstantTypedExpr>(
+          convertedTypedExpr);
+  VELOX_CHECK_NOT_NULL(constantTypedExpr, "{}", expr->toString());
+
+  if (constantTypedExpr->type()->isArray()) {
+    const auto arrayType =
+        std::dynamic_pointer_cast<const ArrayType>(constantTypedExpr->type());
+    if (arrayType->elementType()->kind() == Kind) {
+      return constantTypedExpr;
+    }
+    switch (arrayType->elementType()->kind()) {
+      case TypeKind::INTEGER:
+        return castConstantArrayConditionInput<TypeKind::INTEGER, Kind>(
+            constantTypedExpr);
+      case TypeKind::BIGINT:
+        return castConstantArrayConditionInput<TypeKind::BIGINT, Kind>(
+            constantTypedExpr);
+      case TypeKind::SMALLINT:
+        return castConstantArrayConditionInput<TypeKind::SMALLINT, Kind>(
+            constantTypedExpr);
+      default:
+        VELOX_UNSUPPORTED(
+            "Incompatible condition input type: {}, index column kind: {}",
+            constantTypedExpr->type()->toString(),
+            Kind);
+    }
+  }
+
+  if (constantTypedExpr->type()->kind() == Kind) {
+    return convertedTypedExpr;
+  }
+
+  switch (constantTypedExpr->type()->kind()) {
+    case TypeKind::INTEGER:
+      return castConstantConditionInput<TypeKind::INTEGER, Kind>(
+          constantTypedExpr);
+    case TypeKind::BIGINT:
+      return castConstantConditionInput<TypeKind::BIGINT, Kind>(
+          constantTypedExpr);
+    case TypeKind::SMALLINT:
+      return castConstantConditionInput<TypeKind::SMALLINT, Kind>(
+          constantTypedExpr);
+    default:
+      VELOX_UNSUPPORTED(
+          "Incompatible condition input type: {}, index column kind: {}",
+          constantTypedExpr->type()->toString(),
+          Kind);
+  }
+}
+
+core::TypedExprPtr castIndexConditionInputExpr(
+    const core::TypedExprPtr& expr,
+    const TypePtr& indexType) {
+  switch (indexType->kind()) {
+    case TypeKind::INTEGER:
+      return castIndexConditionInputExpr<TypeKind::INTEGER>(expr);
+    case TypeKind::BIGINT:
+      return castIndexConditionInputExpr<TypeKind::BIGINT>(expr);
+    case TypeKind::SMALLINT:
+      return castIndexConditionInputExpr<TypeKind::SMALLINT>(expr);
+    default:
+      VELOX_UNSUPPORTED("Unsupported index column kind: {}", expr->toString());
+  }
+}
+} // namespace
+
+// static
+core::IndexLookupConditionPtr PlanBuilder::parseIndexJoinCondition(
+    const std::string& joinCondition,
+    const RowTypePtr& rowType,
+    memory::MemoryPool* pool) {
+  const auto joinConditionExpr =
+      parseExpr(joinCondition, rowType, parse::ParseOptions{}, pool);
+  const auto typedCallExpr =
+      std::dynamic_pointer_cast<const core::CallTypedExpr>(joinConditionExpr);
+  VELOX_CHECK_NOT_NULL(typedCallExpr);
+  if (typedCallExpr->name() == "contains") {
+    VELOX_CHECK_EQ(typedCallExpr->inputs().size(), 2);
+    const auto keyColumnExpr =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+            removeCastTypedExpr(typedCallExpr->inputs()[1]));
+    VELOX_CHECK_NOT_NULL(
+        keyColumnExpr, "{}", typedCallExpr->inputs()[1]->toString());
+    return std::make_shared<core::InIndexLookupCondition>(
+        keyColumnExpr,
+        castIndexConditionInputExpr(
+            typedCallExpr->inputs()[0], keyColumnExpr->type()));
+  }
+
+  if (typedCallExpr->name() == "between") {
+    VELOX_CHECK_EQ(typedCallExpr->inputs().size(), 3);
+    const auto keyColumnExpr =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+            removeCastTypedExpr(typedCallExpr->inputs()[0]));
+    VELOX_CHECK_NOT_NULL(
+        keyColumnExpr, "{}", typedCallExpr->inputs()[0]->toString());
+    return std::make_shared<core::BetweenIndexLookupCondition>(
+        keyColumnExpr,
+        castIndexConditionInputExpr(
+            typedCallExpr->inputs()[1], keyColumnExpr->type()),
+        castIndexConditionInputExpr(
+            typedCallExpr->inputs()[2], keyColumnExpr->type()));
+  }
+
+  if (typedCallExpr->name() == "eq") {
+    VELOX_CHECK_EQ(typedCallExpr->inputs().size(), 2);
+    const auto keyColumnExpr =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(
+            removeCastTypedExpr(typedCallExpr->inputs()[0]));
+    VELOX_CHECK_NOT_NULL(
+        keyColumnExpr, "{}", typedCallExpr->inputs()[0]->toString());
+    return std::make_shared<core::EqualIndexLookupCondition>(
+        keyColumnExpr,
+        castIndexConditionInputExpr(
+            typedCallExpr->inputs()[1], keyColumnExpr->type()));
+  }
+  VELOX_USER_FAIL(
+      "Invalid index join condition: {}, and we only support in, between, and equal conditions",
+      joinCondition);
+}
+
+PlanBuilder& PlanBuilder::indexLookupJoin(
+    const std::vector<std::string>& leftKeys,
+    const std::vector<std::string>& rightKeys,
+    const core::TableScanNodePtr& right,
+    const std::vector<std::string>& joinConditions,
+    const std::string& filter,
+    bool hasMarker,
+    const std::vector<std::string>& outputLayout,
+    core::JoinType joinType) {
+  VELOX_CHECK_NOT_NULL(planNode_, "indexLookupJoin cannot be the source node");
+  auto inputType = concat(planNode_->outputType(), right->outputType());
+  if (hasMarker) {
+    auto names = inputType->names();
+    names.push_back(outputLayout.back());
+    auto types = inputType->children();
+    types.push_back(BOOLEAN());
+    inputType = ROW(std::move(names), std::move(types));
+  }
+  auto outputType = extract(inputType, outputLayout);
+  auto leftKeyFields = fields(planNode_->outputType(), leftKeys);
+  auto rightKeyFields = fields(right->outputType(), rightKeys);
+
+  std::vector<core::IndexLookupConditionPtr> joinConditionPtrs{};
+  joinConditionPtrs.reserve(joinConditions.size());
+  for (const auto& joinCondition : joinConditions) {
+    joinConditionPtrs.push_back(
+        parseIndexJoinCondition(joinCondition, inputType, pool_));
+  }
+
+  // Parse filter expression if provided
+  core::TypedExprPtr filterExpr;
+  if (!filter.empty()) {
+    filterExpr = parseExpr(filter, inputType, options_, pool_);
+  }
+
+  planNode_ = std::make_shared<core::IndexLookupJoinNode>(
+      nextPlanNodeId(),
+      joinType,
+      std::move(leftKeyFields),
+      std::move(rightKeyFields),
+      std::move(joinConditionPtrs),
+      filterExpr,
+      hasMarker,
+      std::move(planNode_),
+      right,
+      std::move(outputType));
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::unnest(
+    const std::vector<std::string>& replicateColumns,
+    const std::vector<std::string>& unnestColumns,
+    const std::optional<std::string>& ordinalColumn,
+    const std::optional<std::string>& markerName) {
+  VELOX_CHECK_NOT_NULL(planNode_, "Unnest cannot be the source node");
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>
+      replicateFields;
+  replicateFields.reserve(replicateColumns.size());
+  for (const auto& name : replicateColumns) {
+    replicateFields.emplace_back(field(name));
+  }
+
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> unnestFields;
+  unnestFields.reserve(unnestColumns.size());
+  for (const auto& name : unnestColumns) {
+    unnestFields.emplace_back(field(name));
+  }
+
+  std::vector<std::string> unnestNames;
+  for (const auto& name : unnestColumns) {
+    auto input = planNode_->outputType()->findChild(name);
+    if (input->isArray()) {
+      unnestNames.push_back(name + "_e");
+    } else if (input->isMap()) {
+      unnestNames.push_back(name + "_k");
+      unnestNames.push_back(name + "_v");
+    } else {
+      VELOX_NYI(
+          "Unsupported type of unnest variable. Expected ARRAY or MAP, but got {}.",
+          input->toString());
+    }
+  }
+
+  planNode_ = std::make_shared<core::UnnestNode>(
+      nextPlanNodeId(),
+      replicateFields,
+      unnestFields,
+      unnestNames,
+      ordinalColumn,
+      markerName,
+      planNode_);
+  VELOX_CHECK(planNode_->supportsBarrier());
+  return *this;
+}
+
+namespace {
+std::string throwWindowFunctionDoesntExist(const std::string& name) {
+  std::stringstream error;
+  error << "Window function doesn't exist: " << name << ".";
+  if (exec::windowFunctions().empty()) {
+    error << " Registry of window functions is empty. "
+             "Make sure to register some window functions.";
+  }
+  VELOX_USER_FAIL(error.str());
+}
+
+std::string throwWindowFunctionSignatureNotSupported(
+    const std::string& name,
+    const std::vector<TypePtr>& types,
+    const std::vector<FunctionSignaturePtr>& signatures) {
+  std::stringstream error;
+  error << "Window function signature is not supported: "
+        << toString(name, types)
+        << ". Supported signatures: " << toString(signatures) << ".";
+  VELOX_USER_FAIL(error.str());
+}
+
+TypePtr resolveWindowType(
+    const std::string& windowFunctionName,
+    const std::vector<TypePtr>& inputTypes,
+    bool nullOnFailure) {
+  if (auto signatures = exec::getWindowFunctionSignatures(windowFunctionName)) {
+    for (const auto& signature : signatures.value()) {
+      exec::SignatureBinder binder(*signature, inputTypes);
+      if (binder.tryBind()) {
+        return binder.tryResolveType(signature->returnType());
+      }
+    }
+
+    if (nullOnFailure) {
+      return nullptr;
+    }
+    throwWindowFunctionSignatureNotSupported(
+        windowFunctionName, inputTypes, signatures.value());
+  }
+
+  if (nullOnFailure) {
+    return nullptr;
+  }
+  throwWindowFunctionDoesntExist(windowFunctionName);
+  return nullptr;
+}
+
+class WindowTypeResolver {
+ public:
+  explicit WindowTypeResolver()
+      : previousHook_(core::Expressions::getResolverHook()) {
+    core::Expressions::setTypeResolverHook(
+        [&](const auto& inputs, const auto& expr, bool nullOnFailure) {
+          return resolveType(inputs, expr, nullOnFailure);
+        });
+  }
+
+  ~WindowTypeResolver() {
+    core::Expressions::setTypeResolverHook(previousHook_);
+  }
+
+ private:
+  TypePtr resolveType(
+      const std::vector<core::TypedExprPtr>& inputs,
+      const std::shared_ptr<const core::CallExpr>& expr,
+      bool nullOnFailure) const {
+    std::vector<TypePtr> types;
+    for (auto& input : inputs) {
+      types.push_back(input->type());
+    }
+
+    const auto& functionName = expr->name();
+
+    return resolveWindowType(functionName, types, nullOnFailure);
+  }
+
+  const core::Expressions::TypeResolverHook previousHook_;
+};
+
+const core::WindowNode::Frame createWindowFrame(
+    const duckdb::IExprWindowFrame& windowFrame,
+    const TypePtr& inputRow,
+    memory::MemoryPool* pool) {
+  core::WindowNode::Frame frame;
+  frame.type = (windowFrame.type == duckdb::WindowType::kRows)
+      ? core::WindowNode::WindowType::kRows
+      : core::WindowNode::WindowType::kRange;
+
+  auto boundTypeConversion =
+      [](duckdb::BoundType boundType) -> core::WindowNode::BoundType {
+    switch (boundType) {
+      case duckdb::BoundType::kCurrentRow:
+        return core::WindowNode::BoundType::kCurrentRow;
+      case duckdb::BoundType::kFollowing:
+        return core::WindowNode::BoundType::kFollowing;
+      case duckdb::BoundType::kPreceding:
+        return core::WindowNode::BoundType::kPreceding;
+      case duckdb::BoundType::kUnboundedFollowing:
+        return core::WindowNode::BoundType::kUnboundedFollowing;
+      case duckdb::BoundType::kUnboundedPreceding:
+        return core::WindowNode::BoundType::kUnboundedPreceding;
+    }
+    VELOX_UNREACHABLE();
+  };
+  frame.startType = boundTypeConversion(windowFrame.startType);
+  frame.startValue = windowFrame.startValue
+      ? core::Expressions::inferTypes(windowFrame.startValue, inputRow, pool)
+      : nullptr;
+  frame.endType = boundTypeConversion(windowFrame.endType);
+  frame.endValue = windowFrame.endValue
+      ? core::Expressions::inferTypes(windowFrame.endValue, inputRow, pool)
+      : nullptr;
+  return frame;
+}
+
+std::vector<core::FieldAccessTypedExprPtr> parsePartitionKeys(
+    const duckdb::IExprWindowFunction& windowExpr,
+    const std::string& windowString,
+    const TypePtr& inputRow,
+    memory::MemoryPool* pool) {
+  std::vector<core::FieldAccessTypedExprPtr> partitionKeys;
+  for (const auto& partitionKey : windowExpr.partitionBy) {
+    auto typedExpr =
+        core::Expressions::inferTypes(partitionKey, inputRow, pool);
+    auto typedPartitionKey =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(typedExpr);
+    VELOX_CHECK_NOT_NULL(
+        typedPartitionKey,
+        "PARTITION BY clause must use a column name, not an expression: {}",
+        windowString);
+    partitionKeys.emplace_back(typedPartitionKey);
+  }
+  return partitionKeys;
+}
+
+std::pair<
+    std::vector<core::FieldAccessTypedExprPtr>,
+    std::vector<core::SortOrder>>
+parseOrderByKeys(
+    const duckdb::IExprWindowFunction& windowExpr,
+    const std::string& windowString,
+    const TypePtr& inputRow,
+    memory::MemoryPool* pool) {
+  std::vector<core::FieldAccessTypedExprPtr> sortingKeys;
+  std::vector<core::SortOrder> sortingOrders;
+
+  for (const auto& orderBy : windowExpr.orderBy) {
+    auto typedExpr =
+        core::Expressions::inferTypes(orderBy.expr, inputRow, pool);
+    auto sortingKey =
+        std::dynamic_pointer_cast<const core::FieldAccessTypedExpr>(typedExpr);
+    VELOX_CHECK_NOT_NULL(
+        sortingKey,
+        "ORDER BY clause must use a column name, not an expression: {}",
+        windowString);
+    sortingKeys.emplace_back(sortingKey);
+    sortingOrders.emplace_back(orderBy.ascending, orderBy.nullsFirst);
+  }
+  return {sortingKeys, sortingOrders};
+}
+
+bool equalFieldAccessTypedExprPtrList(
+    const std::vector<core::FieldAccessTypedExprPtr>& lhs,
+    const std::vector<core::FieldAccessTypedExprPtr>& rhs) {
+  return std::equal(
+      lhs.begin(),
+      lhs.end(),
+      rhs.begin(),
+      [](const core::FieldAccessTypedExprPtr& e1,
+         const core::FieldAccessTypedExprPtr& e2) {
+        return e1->name() == e2->name();
+      });
+}
+
+bool equalSortOrderList(
+    const std::vector<core::SortOrder>& lhs,
+    const std::vector<core::SortOrder>& rhs) {
+  return std::equal(
+      lhs.begin(),
+      lhs.end(),
+      rhs.begin(),
+      [](const core::SortOrder& s1, const core::SortOrder& s2) {
+        return s1.isAscending() == s2.isAscending() &&
+            s1.isNullsFirst() == s2.isNullsFirst();
+      });
+}
+
+} // namespace
+
+PlanBuilder& PlanBuilder::window(
+    const std::vector<std::string>& windowFunctions,
+    bool inputSorted) {
+  VELOX_CHECK_NOT_NULL(planNode_, "Window cannot be the source node");
+  VELOX_CHECK_GT(
+      windowFunctions.size(),
+      0,
+      "Window Node requires at least one window function.");
+
+  std::vector<core::FieldAccessTypedExprPtr> partitionKeys;
+  std::vector<core::FieldAccessTypedExprPtr> sortingKeys;
+  std::vector<core::SortOrder> sortingOrders;
+  std::vector<core::WindowNode::Function> windowNodeFunctions;
+  std::vector<std::string> windowNames;
+
+  bool first = true;
+  auto inputType = planNode_->outputType();
+  int i = 0;
+
+  auto errorOnMismatch = [&](const std::string& windowString,
+                             const std::string& mismatchTypeString) -> void {
+    std::stringstream error;
+    error << "Window function invocations " << windowString << " and "
+          << windowFunctions[0] << " do not match " << mismatchTypeString
+          << " clauses.";
+    VELOX_USER_FAIL(error.str());
+  };
+
+  WindowTypeResolver windowResolver;
+  facebook::velox::duckdb::ParseOptions options;
+  options.parseIntegerAsBigint = options_.parseIntegerAsBigint;
+  for (const auto& windowString : windowFunctions) {
+    const auto& windowExpr = duckdb::parseWindowExpr(windowString, options);
+    // All window function SQL strings in the list are expected to have the same
+    // PARTITION BY and ORDER BY clauses. Validate this assumption.
+    if (first) {
+      partitionKeys =
+          parsePartitionKeys(windowExpr, windowString, inputType, pool_);
+      auto sortPair =
+          parseOrderByKeys(windowExpr, windowString, inputType, pool_);
+      sortingKeys = sortPair.first;
+      sortingOrders = sortPair.second;
+      first = false;
+    } else {
+      auto latestPartitionKeys =
+          parsePartitionKeys(windowExpr, windowString, inputType, pool_);
+      auto [latestSortingKeys, latestSortingOrders] =
+          parseOrderByKeys(windowExpr, windowString, inputType, pool_);
+
+      if (!equalFieldAccessTypedExprPtrList(
+              partitionKeys, latestPartitionKeys)) {
+        errorOnMismatch(windowString, "PARTITION BY");
+      }
+
+      if (!equalFieldAccessTypedExprPtrList(sortingKeys, latestSortingKeys)) {
+        errorOnMismatch(windowString, "ORDER BY");
+      }
+
+      if (!equalSortOrderList(sortingOrders, latestSortingOrders)) {
+        errorOnMismatch(windowString, "ORDER BY");
+      }
+    }
+
+    auto windowCall = std::dynamic_pointer_cast<const core::CallTypedExpr>(
+        core::Expressions::inferTypes(
+            windowExpr.functionCall, planNode_->outputType(), pool_));
+    windowNodeFunctions.push_back(
+        {std::move(windowCall),
+         createWindowFrame(windowExpr.frame, planNode_->outputType(), pool_),
+         windowExpr.ignoreNulls});
+    if (windowExpr.functionCall->alias().has_value()) {
+      windowNames.push_back(windowExpr.functionCall->alias().value());
+    } else {
+      windowNames.push_back(fmt::format("w{}", i++));
+    }
+  }
+
+  planNode_ = std::make_shared<core::WindowNode>(
+      nextPlanNodeId(),
+      partitionKeys,
+      sortingKeys,
+      sortingOrders,
+      windowNames,
+      windowNodeFunctions,
+      inputSorted,
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::window(
+    const std::vector<std::string>& windowFunctions) {
+  return window(windowFunctions, false);
+}
+
+PlanBuilder& PlanBuilder::streamingWindow(
+    const std::vector<std::string>& windowFunctions) {
+  return window(windowFunctions, true);
+}
+
+PlanBuilder& PlanBuilder::rowNumber(
+    const std::vector<std::string>& partitionKeys,
+    std::optional<int32_t> limit,
+    const bool generateRowNumber) {
+  std::optional<std::string> rowNumberColumnName;
+  if (generateRowNumber) {
+    rowNumberColumnName = "row_number";
+  }
+  planNode_ = std::make_shared<core::RowNumberNode>(
+      nextPlanNodeId(),
+      fields(partitionKeys),
+      rowNumberColumnName,
+      limit,
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::topNRank(
+    std::string_view function,
+    const std::vector<std::string>& partitionKeys,
+    const std::vector<std::string>& sortingKeys,
+    int32_t limit,
+    bool generateRowNumber) {
+  VELOX_CHECK_NOT_NULL(planNode_, "TopNRowNumber cannot be the source node");
+  auto [sortingFields, sortingOrders] =
+      parseOrderByClauses(sortingKeys, planNode_->outputType(), pool_);
+  std::optional<std::string> rowNumberColumnName;
+  if (generateRowNumber) {
+    rowNumberColumnName = "row_number";
+  }
+  planNode_ = std::make_shared<core::TopNRowNumberNode>(
+      nextPlanNodeId(),
+      core::TopNRowNumberNode::rankFunctionFromName(function),
+      fields(partitionKeys),
+      sortingFields,
+      sortingOrders,
+      rowNumberColumnName,
+      limit,
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+PlanBuilder& PlanBuilder::topNRowNumber(
+    const std::vector<std::string>& partitionKeys,
+    const std::vector<std::string>& sortingKeys,
+    int32_t limit,
+    bool generateRowNumber) {
+  return topNRank(
+      "row_number", partitionKeys, sortingKeys, limit, generateRowNumber);
+}
+
+PlanBuilder& PlanBuilder::markDistinct(
+    std::string markerKey,
+    const std::vector<std::string>& distinctKeys) {
+  VELOX_CHECK_NOT_NULL(planNode_, "MarkDistinct cannot be the source node");
+  planNode_ = std::make_shared<core::MarkDistinctNode>(
+      nextPlanNodeId(),
+      std::move(markerKey),
+      fields(planNode_->outputType(), distinctKeys),
+      planNode_);
+  VELOX_CHECK(!planNode_->supportsBarrier());
+  return *this;
+}
+
+core::PlanNodeId PlanBuilder::nextPlanNodeId() {
+  return planNodeIdGenerator_->next();
+}
+
+// static
+std::shared_ptr<const core::FieldAccessTypedExpr> PlanBuilder::field(
+    const RowTypePtr& inputType,
+    const std::string& name) {
+  auto index = inputType->getChildIdx(name);
+  return field(inputType, index);
+}
+
+// static
+std::shared_ptr<const core::FieldAccessTypedExpr> PlanBuilder::field(
+    const RowTypePtr& inputType,
+    column_index_t index) {
+  auto name = inputType->names()[index];
+  auto type = inputType->childAt(index);
+  return std::make_shared<core::FieldAccessTypedExpr>(type, name);
+}
+
+// static
+std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>
+PlanBuilder::fields(
+    const RowTypePtr& inputType,
+    const std::vector<std::string>& names) {
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields;
+  for (const auto& name : names) {
+    fields.push_back(field(inputType, name));
+  }
+  return fields;
+}
+
+// static
+std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>
+PlanBuilder::fields(
+    const RowTypePtr& inputType,
+    const std::vector<column_index_t>& indices) {
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields;
+  for (auto& index : indices) {
+    fields.push_back(field(inputType, index));
+  }
+  return fields;
+}
+
+std::shared_ptr<const core::FieldAccessTypedExpr> PlanBuilder::field(
+    column_index_t index) {
+  VELOX_CHECK_NOT_NULL(planNode_);
+  return field(planNode_->outputType(), index);
+}
+
+std::shared_ptr<const core::FieldAccessTypedExpr> PlanBuilder::field(
+    const std::string& name) {
+  VELOX_CHECK_NOT_NULL(planNode_);
+  return field(planNode_->outputType(), name);
+}
+
+std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>
+PlanBuilder::fields(const std::vector<std::string>& names) {
+  VELOX_CHECK_NOT_NULL(planNode_);
+  return fields(planNode_->outputType(), names);
+}
+
+std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>>
+PlanBuilder::fields(const std::vector<column_index_t>& indices) {
+  VELOX_CHECK_NOT_NULL(planNode_);
+  return fields(planNode_->outputType(), indices);
+}
+
+std::vector<core::TypedExprPtr> PlanBuilder::exprs(
+    const std::vector<std::string>& expressions,
+    const RowTypePtr& inputType) {
+  std::vector<core::TypedExprPtr> typedExpressions;
+  for (auto& expr : expressions) {
+    auto typedExpression = core::Expressions::inferTypes(
+        parse::parseExpr(expr, options_), inputType, pool_);
+
+    if (dynamic_cast<const core::FieldAccessTypedExpr*>(
+            typedExpression.get())) {
+      typedExpressions.push_back(typedExpression);
+    } else if (dynamic_cast<const core::ConstantTypedExpr*>(
+                   typedExpression.get())) {
+      typedExpressions.push_back(typedExpression);
+    } else {
+      VELOX_FAIL("Expected field name or constant: {}", expr);
+    }
+  }
+
+  return typedExpressions;
+}
+
+core::TypedExprPtr PlanBuilder::inferTypes(
+    const std::shared_ptr<const core::IExpr>& untypedExpr) {
+  VELOX_CHECK_NOT_NULL(planNode_);
+  return core::Expressions::inferTypes(
+      untypedExpr, planNode_->outputType(), pool_);
+}
+
+core::PlanNodePtr PlanBuilder::IndexLookupJoinBuilder::build(
+    const core::PlanNodeId& id) {
+  VELOX_CHECK_NOT_NULL(
+      planBuilder_.planNode_, "IndexLookupJoin cannot be the source node");
+  auto inputType =
+      concat(planBuilder_.planNode_->outputType(), indexSource_->outputType());
+  if (hasMarker_) {
+    auto names = inputType->names();
+    names.push_back(outputLayout_.back());
+    auto types = inputType->children();
+    types.push_back(BOOLEAN());
+    inputType = ROW(std::move(names), std::move(types));
+  }
+  auto outputType = extract(inputType, outputLayout_);
+  auto leftKeyFields =
+      PlanBuilder::fields(planBuilder_.planNode_->outputType(), leftKeys_);
+  auto rightKeyFields =
+      PlanBuilder::fields(indexSource_->outputType(), rightKeys_);
+
+  std::vector<core::IndexLookupConditionPtr> joinConditionPtrs{};
+  joinConditionPtrs.reserve(joinConditions_.size());
+  for (const auto& joinCondition : joinConditions_) {
+    joinConditionPtrs.push_back(PlanBuilder::parseIndexJoinCondition(
+        joinCondition, inputType, planBuilder_.pool_));
+  }
+
+  // Parse filter expression if provided
+  core::TypedExprPtr filterExpr;
+  if (!filter_.empty()) {
+    filterExpr = parseExpr(
+        filter_, inputType, planBuilder_.options_, planBuilder_.pool_);
+  }
+
+  return std::make_shared<core::IndexLookupJoinNode>(
+      id,
+      joinType_,
+      std::move(leftKeyFields),
+      std::move(rightKeyFields),
+      std::move(joinConditionPtrs),
+      filterExpr,
+      hasMarker_,
+      std::move(planBuilder_.planNode_),
+      indexSource_,
+      std::move(outputType));
+}
+} // namespace facebook::velox::exec::test
\ No newline at end of file
diff --git a/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.h b/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.h
new file mode 100644
index 000000000000..203e5e58fcfb
--- /dev/null
+++ b/velox/connectors/lakehouse/iceberg/tests/PlanBuilder.h
@@ -0,0 +1,1338 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <velox/core/Expressions.h>
+#include <velox/core/ITypedExpr.h>
+#include <velox/core/PlanFragment.h>
+#include <velox/core/PlanNode.h>
+#include "velox/connectors/lakehouse/iceberg/TableHandleBase.h"
+#include "velox/parse/ExpressionsParser.h"
+#include "velox/parse/IExpr.h"
+#include "velox/parse/PlanNodeIdGenerator.h"
+
+namespace facebook::velox::tpch {
+enum class Table : uint8_t;
+}
+
+namespace facebook::velox::tpcds {
+enum class Table : uint8_t;
+}
+
+namespace facebook::velox::exec::test {
+
+struct PushdownConfig {
+  common::SubfieldFilters subfieldFiltersMap;
+  std::string remainingFilter;
+};
+
+/// A builder class with fluent API for building query plans. Plans are built
+/// bottom up starting with the source node (table scan or similar). Expressions
+/// and orders can be specified using SQL. See filter, project and orderBy
+/// methods for details.
+///
+/// For example, to build a query plan for a leaf fragment of a simple query
+///     SELECT a, sum(b) FROM t GROUP BY 1
+///
+///     auto plan = PlanBuilder()
+///         .tableScan(ROW({"a", "b"}, {INTEGER(), DOUBLE()}))
+///         .partialAggregation({"a"}, {"sum(b)"})
+///         .planNode();
+///
+/// Here, we use default PlanNodeIdGenerator that starts from zero, hence, table
+/// scan node ID will be "0". You'll need to use this ID when adding splits.
+///
+/// A join query plan would be a bit more complex:
+///     SELECT t.a, u.b FROM t, u WHERE t.key = u.key
+///
+///     auto planNodeIdGenerator = std::make_shared<PlanNodeIdGenerator>();
+///     core::PlanNodeId tScanId; // ID of the table scan node for 't'.
+///     core::PlanNodeId uScanId; // ID of the table scan node for 'u'.
+///     auto plan = PlanBuilder(planNodeIdGenerator)
+///         .tableScan(ROW({"key", "a"}, {INTEGER(), BIGINT()}))
+///         .capturePlanNodeId(tScanId)
+///         .hashJoin(
+///             {"key"},
+///             {"key"},
+///             PlanBuilder(planNodeIdGenerator)
+///                 .tableScan(ROW({"key", "b"}, {INTEGER(), DOUBLE()})))
+///                 .capturePlanNodeId(uScanId)
+///                 .planNode(),
+///             "", // no extra join filter
+///             {"a", "b"})
+///         .planNode();
+///
+/// We use two builders, one for the right-side and another for the left-side
+/// of the join. To ensure plan node IDs are unique in the final plan, we use
+/// the same instance of PlanNodeIdGenerator with both builders. We also use
+/// capturePlanNodeId method to capture the IDs of the table scan nodes for
+/// 't' and 'u'. We need these to add splits.
+class PlanBuilder {
+ public:
+  /// Constructor taking an instance of PlanNodeIdGenerator and a memory pool.
+  ///
+  /// The memory pool is used when parsing expressions containing complex-type
+  /// literals, e.g. arrays, maps or structs. The memory pool can be empty if
+  /// such expressions are not used in the plan.
+  ///
+  /// When creating tree-shaped plans, e.g. join queries, use the same instance
+  /// of PlanNodeIdGenerator for all builders to ensure unique plan node IDs
+  /// across the plan.
+  explicit PlanBuilder(
+      std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
+      memory::MemoryPool* pool = nullptr)
+      : planNodeIdGenerator_{std::move(planNodeIdGenerator)}, pool_{pool} {}
+
+  /// Constructor with no required parameters suitable for creating
+  /// straight-line (e.g. no joins) query plans.
+  explicit PlanBuilder(memory::MemoryPool* pool = nullptr)
+      : PlanBuilder(std::make_shared<core::PlanNodeIdGenerator>(), pool) {}
+
+  /// Constructor that allows an initial plane node to be specified for testing
+  /// this is useful when testing additional connectors that do not rely on the
+  /// table scan node supported below.
+  PlanBuilder(
+      core::PlanNodePtr initialPlanNode,
+      std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
+      memory::MemoryPool* pool = nullptr)
+      : planNode_(std::move(initialPlanNode)),
+        planNodeIdGenerator_{std::move(planNodeIdGenerator)},
+        pool_{pool} {}
+
+  virtual ~PlanBuilder() = default;
+
+  static constexpr const std::string_view kHiveDefaultConnectorId{"test-hive"};
+  static constexpr const std::string_view kTpchDefaultConnectorId{"test-tpch"};
+  static constexpr const std::string_view kTpcdsDefaultConnectorId{
+      "test-tpcds"};
+
+  ///
+  /// TableScan
+  ///
+
+  /// Add a TableScanNode to scan a Hive table.
+  ///
+  /// @param outputType List of column names and types to read from the table.
+  /// @param subfieldFilters A list of SQL expressions for the range filters to
+  /// apply to individual columns. Supported filters are: column <= value,
+  /// column < value, column >= value, column > value, column = value, column IN
+  /// (v1, v2,.. vN), column < v1 OR column >= v2.
+  /// @param remainingFilter SQL expression for the additional conjunct. May
+  /// include multiple columns and SQL functions. The remainingFilter is AND'ed
+  /// with all the subfieldFilters.
+  /// @param dataColumns can be different from 'outputType' for the purposes
+  /// of testing queries using missing columns. It is used, if specified, for
+  /// parseExpr call and as 'dataColumns' for the TableHandle. You supply more
+  /// types (for all columns) in this argument as opposed to 'outputType', where
+  /// you define the output types only. See 'missingColumns' test in
+  /// 'TableScanTest'.
+  /// @param assignments Optional ColumnHandles.
+  PlanBuilder& tableScan(
+      const RowTypePtr& outputType,
+      const std::vector<std::string>& subfieldFilters = {},
+      const std::string& remainingFilter = "",
+      const RowTypePtr& dataColumns = nullptr,
+      const connector::ColumnHandleMap& assignments = {});
+
+  /// Add a TableScanNode to scan a Hive table.
+  ///
+  /// @param tableName The name of the table to scan.
+  /// @param outputType List of column names and types to read from the table.
+  /// @param columnAliases Optional aliases for the column names. The key is the
+  /// alias (name in 'outputType'), value is the name in the files.
+  /// @param subfieldFilters A list of SQL expressions for the range filters to
+  /// apply to individual columns. Should use column name aliases, not column
+  /// names in the files. Supported filters are: column <= value, column <
+  /// value, column >= value, column > value, column = value, column IN (v1,
+  /// v2,.. vN), column < v1 OR column >= v2.
+  /// @param remainingFilter SQL expression for the additional conjunct. May
+  /// include multiple columns and SQL functions. Should use column name
+  /// aliases, not column names in the files. The remainingFilter is AND'ed
+  /// with all the subfieldFilters.
+  /// @param dataColumns can be different from 'outputType' for the purposes
+  /// of testing queries using missing columns. It is used, if specified, for
+  /// parseExpr call and as 'dataColumns' for the TableHandle. You supply more
+  /// types (for all columns) in this argument as opposed to 'outputType', where
+  /// you define the output types only. See 'missingColumns' test in
+  /// 'TableScanTest'.
+  PlanBuilder& tableScan(
+      const std::string& tableName,
+      const RowTypePtr& outputType,
+      const std::unordered_map<std::string, std::string>& columnAliases = {},
+      const std::vector<std::string>& subfieldFilters = {},
+      const std::string& remainingFilter = "",
+      const RowTypePtr& dataColumns = nullptr,
+      const connector::ColumnHandleMap& assignments = {});
+
+  /// Add a TableScanNode to scan a Hive table with direct SubfieldFilters.
+  ///
+  /// @param outputType List of column names and types to read from the table.
+  /// @param PushdownConfig Contains pushdown configs for the table scan.
+  /// @param dataColumns Optional data columns that may differ from outputType.
+  /// @param assignments Optional ColumnHandles.
+
+  PlanBuilder& tableScanWithPushDown(
+      const RowTypePtr& outputType,
+      const PushdownConfig& pushdownConfig,
+      const RowTypePtr& dataColumns = nullptr,
+      const connector::ColumnHandleMap& assignments = {});
+
+  /// Add a TableScanNode to scan a TPC-H table.
+  ///
+  /// @param tpchTableHandle The handle that specifies the target TPC-H table
+  /// and scale factor.
+  /// @param columnNames The columns to be returned from that table.
+  /// @param scaleFactor The TPC-H scale factor.
+  /// @param connectorId The TPC-H connector id.
+  /// @param filter Optional SQL expression to filter the data at the connector
+  /// level.
+  PlanBuilder& tpchTableScan(
+      tpch::Table table,
+      std::vector<std::string> columnNames,
+      double scaleFactor = 1,
+      std::string_view connectorId = kTpchDefaultConnectorId,
+      const std::string& filter = "");
+
+  /// Add a TableScanNode to scan a TPC-DS table.
+  ///
+  /// @param tpcdsTableHandle The handle that specifies the target TPC-DS table
+  /// and scale factor.
+  /// @param columnNames The columns to be returned from that table.
+  /// @param scaleFactor The TPC-DS scale factor.
+  /// @param connectorId The TPC-DS connector id.
+  PlanBuilder& tpcdsTableScan(
+      tpcds::Table table,
+      std::vector<std::string> columnNames,
+      double scaleFactor = 0.01,
+      std::string_view connectorId = kTpcdsDefaultConnectorId);
+
+  /// Helper class to build a custom TableScanNode.
+  /// Uses a planBuilder instance to get the next plan id, memory pool, and
+  /// parse options.
+  ///
+  /// Uses the iceberg connector by default. Specify outputType, tableHandle, and
+  /// assignments for other connectors. If these three are specified, all other
+  /// builder arguments will be ignored.
+  class TableScanBuilder {
+   public:
+    TableScanBuilder(PlanBuilder& builder) : planBuilder_(builder) {}
+
+    /// @param tableName The name of the table to scan.
+    TableScanBuilder& tableName(std::string tableName) {
+      tableName_ = std::move(tableName);
+      return *this;
+    }
+
+    /// if 'idGenerator' is non-nullptr, produces filters that would be pushed
+    /// down into the scan as a separate FilterNode instead. 'idGenerator'
+    /// produces the id for the filterNode.
+    TableScanBuilder& filtersAsNode(
+        std::shared_ptr<core::PlanNodeIdGenerator> idGenerator) {
+      filtersAsNode_ = idGenerator != nullptr;
+      planNodeIdGenerator_ = idGenerator;
+      return *this;
+    }
+
+    /// @param connectorId The id of the connector to scan.
+    TableScanBuilder& connectorId(std::string connectorId) {
+      connectorId_ = std::move(connectorId);
+      return *this;
+    }
+
+    /// @param outputType List of column names and types to read from the table.
+    /// This property is required.
+    TableScanBuilder& outputType(RowTypePtr outputType) {
+      outputType_ = std::move(outputType);
+      return *this;
+    }
+
+    /// @param subfieldFilters A list of SQL expressions to apply to individual
+    /// columns. These are range filters that can be efficiently applied as data
+    /// is read/decoded. Supported filters are:
+    ///
+    /// >  column <= value
+    /// >  column < value
+    /// >  column >= value
+    /// >  column > value
+    /// >  column = value
+    /// >  column IN (v1, v2,.. vN)
+    /// >  column < v1
+    /// >  column >= v2
+    TableScanBuilder& subfieldFilters(std::vector<std::string> subfieldFilters);
+
+    // @param subfieldFiltersMap A map of Subfield to Filters.
+    TableScanBuilder& subfieldFiltersMap(
+        const common::SubfieldFilters& filtersMap);
+
+    /// @param subfieldFilter A single SQL expression to be applied to an
+    /// individual column.
+    TableScanBuilder& subfieldFilter(std::string subfieldFilter) {
+      return subfieldFilters({std::move(subfieldFilter)});
+    }
+
+    /// @param remainingFilter SQL expression for the additional conjunct. May
+    /// include multiple columns and SQL functions. The remainingFilter is
+    /// AND'ed with all the subfieldFilters.
+    TableScanBuilder& remainingFilter(std::string remainingFilter);
+
+    /// @param dataColumns can be different from 'outputType' for the purposes
+    /// of testing queries using missing columns. It is used, if specified, for
+    /// parseExpr call and as 'dataColumns' for the TableHandle. You supply more
+    /// types (for all columns) in this argument as opposed to 'outputType',
+    /// where you define the output types only. See 'missingColumns' test in
+    /// 'TableScanTest'.
+    TableScanBuilder& dataColumns(RowTypePtr dataColumns) {
+      dataColumns_ = std::move(dataColumns);
+      return *this;
+    }
+
+    /// @param columnAliases Optional aliases for the column names. The key is
+    /// the alias (name in 'outputType'), value is the name in the files.
+    TableScanBuilder& columnAliases(
+        std::unordered_map<std::string, std::string> columnAliases) {
+      columnAliases_ = std::move(columnAliases);
+      return *this;
+    }
+
+    /// @param tableHandle Optional tableHandle. Other builder arguments such as
+    /// the `subfieldFilters` and `remainingFilter` will be ignored.
+    TableScanBuilder& tableHandle(
+        std::shared_ptr<const connector::lakehouse::iceberg::TableHandleBase> tableHandle) {
+      tableHandle_ = std::move(tableHandle);
+      return *this;
+    }
+
+    /// @param assignments Optional ColumnHandles.
+    /// outputType names should match the keys in the 'assignments' map. The
+    /// 'assignments' map may contain more columns than 'outputType' if some
+    /// columns are only used by pushed-down filters.
+    TableScanBuilder& assignments(connector::ColumnHandleMap assignments) {
+      assignments_ = std::move(assignments);
+      return *this;
+    }
+
+    /// Stop the TableScanBuilder.
+    PlanBuilder& endTableScan() {
+      planBuilder_.planNode_ = build(planBuilder_.nextPlanNodeId());
+      return planBuilder_;
+    }
+
+   private:
+    /// Build the plan node TableScanNode.
+    core::PlanNodePtr build(core::PlanNodeId id);
+
+    PlanBuilder& planBuilder_;
+    std::string tableName_{"iceberg_table"};
+    std::string connectorId_{kHiveDefaultConnectorId};
+    RowTypePtr outputType_;
+    core::ExprPtr remainingFilter_;
+    RowTypePtr dataColumns_;
+    std::unordered_map<std::string, std::string> columnAliases_;
+    std::shared_ptr<const connector::lakehouse::iceberg::TableHandleBase> tableHandle_;
+    connector::ColumnHandleMap assignments_;
+
+    // produce filters as a FilterNode instead of pushdown.
+    bool filtersAsNode_{false};
+
+    // Generates the id of a FilterNode if 'filtersAsNode_'.
+    std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator_;
+
+    // SubfieldFilters object containing filters to apply.
+    common::SubfieldFilters subfieldFiltersMap_;
+  };
+
+  /// Start a TableScanBuilder.
+  TableScanBuilder& startTableScan() {
+    tableScanBuilder_.reset(new TableScanBuilder(*this));
+    return *tableScanBuilder_;
+  }
+
+  /// Helper class to build a custom IndexLookupJoinNode.
+  class IndexLookupJoinBuilder {
+   public:
+    explicit IndexLookupJoinBuilder(PlanBuilder& builder)
+        : planBuilder_(builder) {}
+
+    /// @param leftKeys Join keys from the table scan side, the preceding plan
+    /// node. Cannot be empty.
+    IndexLookupJoinBuilder& leftKeys(std::vector<std::string> leftKeys) {
+      leftKeys_ = std::move(leftKeys);
+      return *this;
+    }
+
+    /// @param rightKeys Join keys from the index lookup side, the plan node
+    /// specified in 'right' parameter. The number and types of left and right
+    /// keys must be the same.
+    IndexLookupJoinBuilder& rightKeys(std::vector<std::string> rightKeys) {
+      rightKeys_ = std::move(rightKeys);
+      return *this;
+    }
+
+    /// @param indexSource The right input source with index lookup support.
+    IndexLookupJoinBuilder& indexSource(
+        const core::TableScanNodePtr& indexSource) {
+      indexSource_ = indexSource;
+      return *this;
+    }
+
+    IndexLookupJoinBuilder& joinConditions(
+        std::vector<std::string> joinConditions) {
+      joinConditions_ = std::move(joinConditions);
+      return *this;
+    }
+
+    IndexLookupJoinBuilder& hasMarker(bool hasMarker) {
+      hasMarker_ = hasMarker;
+      return *this;
+    }
+
+    IndexLookupJoinBuilder& outputLayout(
+        std::vector<std::string> outputLayout) {
+      outputLayout_ = std::move(outputLayout);
+      return *this;
+    }
+
+    /// @param filter SQL expression for the additional join filter. Can
+    /// use columns from both probe and build sides of the join.
+    IndexLookupJoinBuilder& filter(std::string filter) {
+      filter_ = std::move(filter);
+      return *this;
+    }
+
+    /// @param joinType Type of the join supported: inner, left.
+    IndexLookupJoinBuilder& joinType(core::JoinType joinType) {
+      joinType_ = joinType;
+      return *this;
+    }
+
+    /// Stop the IndexLookupJoinBuilder.
+    PlanBuilder& endIndexLookupJoin() {
+      planBuilder_.planNode_ = build(planBuilder_.nextPlanNodeId());
+      return planBuilder_;
+    }
+
+   private:
+    /// Build the plan node IndexLookupJoinNode.
+    core::PlanNodePtr build(const core::PlanNodeId& id);
+
+    PlanBuilder& planBuilder_;
+    std::vector<std::string> leftKeys_;
+    std::vector<std::string> rightKeys_;
+    core::TableScanNodePtr indexSource_;
+    std::vector<std::string> joinConditions_;
+    std::string filter_;
+    bool hasMarker_{false};
+    std::vector<std::string> outputLayout_;
+    core::JoinType joinType_{core::JoinType::kInner};
+  };
+
+  /// Start an IndexLookupJoinBuilder.
+  IndexLookupJoinBuilder& startIndexLookupJoin() {
+    indexLookupJoinBuilder_.reset(new IndexLookupJoinBuilder(*this));
+    return *indexLookupJoinBuilder_;
+  }
+
+  /// Add a ValuesNode using specified data.
+  ///
+  /// @param values The data to use.
+  /// @param parallelizable If true, ValuesNode can run multi-threaded, in which
+  /// case it will produce duplicate data from each thread, e.g. each thread
+  /// will return all the data in 'values'. Useful for testing.
+  /// @param repeatTimes The number of times data is produced as input. If
+  /// greater than one, each RowVector will produce data as input `repeatTimes`.
+  /// For example, in case `values` has 3 vectors {v1, v2, v3} and repeatTimes
+  /// is 2, the input produced will be {v1, v2, v3, v1, v2, v3}. Useful for
+  /// testing.
+  PlanBuilder& values(
+      const std::vector<RowVectorPtr>& values,
+      bool parallelizable = false,
+      size_t repeatTimes = 1);
+
+  PlanBuilder& filtersAsNode(bool filtersAsNode) {
+    filtersAsNode_ = filtersAsNode;
+    return *this;
+  }
+
+  /// Adds a QueryReplayNode for query tracing.
+  ///
+  /// @param traceNodeDir The trace directory for a given plan node.
+  /// @param pipelineId The pipeline id for the traced operator instantiated
+  /// from the given plan node.
+  /// @param driverIds The target driver ID list for replay. The replaying
+  /// operator uses its driver instance id as the list index to get the traced
+  /// driver id for replay.
+  /// @param outputType The type of the tracing data.
+  PlanBuilder& traceScan(
+      const std::string& traceNodeDir,
+      uint32_t pipelineId,
+      std::vector<uint32_t> driverIds,
+      const RowTypePtr& outputType);
+
+  /// Add an ExchangeNode.
+  ///
+  /// Use capturePlanNodeId method to capture the node ID needed for adding
+  /// splits.
+  ///
+  /// @param outputType The type of the data coming in and out of the exchange.
+  /// @param serdekind The kind of seralized data format.
+  PlanBuilder& exchange(
+      const RowTypePtr& outputType,
+      VectorSerde::Kind serdekind);
+
+  /// Add a MergeExchangeNode using specified ORDER BY clauses.
+  ///
+  /// For example,
+  ///
+  ///     .mergeExchange(outputRowType, {"a", "b DESC", "c ASC NULLS FIRST"})
+  ///
+  /// By default, uses ASC NULLS LAST sort order, e.g. column "a" above will use
+  /// ASC NULLS LAST and column "b" will use DESC NULLS LAST.
+  PlanBuilder& mergeExchange(
+      const RowTypePtr& outputType,
+      const std::vector<std::string>& keys,
+      VectorSerde::Kind serdekind);
+
+  /// Add a ProjectNode using specified SQL expressions.
+  ///
+  /// For example,
+  ///
+  ///     .project({"a + b", "c * 3"})
+  ///
+  /// The names of the projections can be specified using SQL statement AS:
+  ///
+  ///     .project({"a + b AS sum_ab", "c * 3 AS triple_c"})
+  ///
+  /// If AS statement is not used, the names of the projections will be
+  /// generated as p0, p1, p2, etc. Names of columns projected as is will be
+  /// preserved.
+  ///
+  /// For example,
+  ///
+  ///     project({"a + b AS sum_ab", "c", "d * 7")
+  ///
+  /// will produce projected columns named sum_ab, c and p2.
+  PlanBuilder& project(const std::vector<std::string>& projections);
+
+  /// Add a ParallelProjectNode using groups of independent SQL expressions.
+  ///
+  /// @param projectionGroups One or more groups of expressions that depend on
+  /// disjunct sets of inputs.
+  /// @param noLoadColumn Optional columns to pass through as is without
+  /// loading. These columns must be distinct from the set of columns used in
+  /// 'projectionGroups'.
+  PlanBuilder& parallelProject(
+      const std::vector<std::vector<std::string>>& projectionGroups,
+      const std::vector<std::string>& noLoadColumns = {});
+
+  /// Add a LazyDereferenceNode to the plan.
+  /// @param projections Same format as in `project`, but can only contain
+  /// field/subfield accesses.
+  PlanBuilder& lazyDereference(const std::vector<std::string>& projections);
+
+  /// Add a ProjectNode to keep all existing columns and append more columns
+  /// using specified expressions.
+  /// @param newColumns A list of one or more expressions to use for computing
+  /// additional columns.
+  PlanBuilder& appendColumns(const std::vector<std::string>& newColumns);
+
+  /// Variation of project that takes untyped expressions.  Used for access
+  /// deeply nested types, in which case Duck DB often fails to parse or infer
+  /// the type.
+  PlanBuilder& projectExpressions(
+      const std::vector<core::ExprPtr>& projections);
+
+  PlanBuilder& projectExpressions(
+      const std::vector<core::TypedExprPtr>& projections);
+
+  /// Similar to project() except 'optionalProjections' could be empty and the
+  /// function will skip creating a ProjectNode in that case.
+  PlanBuilder& optionalProject(
+      const std::vector<std::string>& optionalProjections);
+
+  /// Add a FilterNode using specified SQL expression.
+  ///
+  /// @param filter SQL expression of type boolean.
+  PlanBuilder& filter(const std::string& filter);
+
+  /// Similar to filter() except 'optionalFilter' could be empty and the
+  /// function will skip creating a FilterNode in that case.
+  PlanBuilder& optionalFilter(const std::string& optionalFilter);
+
+  /// Add an AggregationNode representing partial aggregation with the
+  /// specified grouping keys, aggregates and optional masks.
+  ///
+  /// Aggregates are specified as function calls over unmodified input
+  /// columns, e.g. sum(a), avg(b), min(c). SQL statement AS can be used to
+  /// specify names for the aggregation result columns. In the absence of AS
+  /// statement, result columns are named a0, a1, a2, etc.
+  ///
+  /// For example,
+  ///
+  ///     partialAggregation({}, {"min(a) AS min_a", "max(b)"})
+  ///
+  /// will produce output columns min_a and a1, while
+  ///
+  ///     partialAggregation({"k1", "k2"}, {"min(a) AS min_a", "max(b)"})
+  ///
+  /// will produce output columns k1, k2, min_a and a1, assuming the names
+  /// of the first two input columns are k1 and k2.
+  PlanBuilder& partialAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks = {}) {
+    return aggregation(
+        groupingKeys,
+        {},
+        aggregates,
+        masks,
+        core::AggregationNode::Step::kPartial,
+        false);
+  }
+
+  /// Add final aggregation plan node to match the current partial aggregation
+  /// node. Should be called directly after partialAggregation() method or
+  /// directly after intermediateAggregation() that follows
+  /// partialAggregation(). Can be called also if there is a local exchange
+  /// after partial or intermediate aggregation.
+  PlanBuilder& finalAggregation();
+
+  /// Add final aggregation plan node using specified grouping keys, aggregate
+  /// expressions and their types.
+  ///
+  /// @param rawInputTypes Raw input types for the aggregate functions.
+  PlanBuilder& finalAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::vector<TypePtr>>& rawInputTypes) {
+    return aggregation(
+        groupingKeys,
+        {},
+        aggregates,
+        {},
+        core::AggregationNode::Step::kFinal,
+        false,
+        rawInputTypes);
+  }
+
+  /// Add intermediate aggregation plan node to match the current partial
+  /// aggregation node. Should be called directly after partialAggregation()
+  /// method. Can be called also if there is a local exchange after partial
+  /// aggregation.
+  PlanBuilder& intermediateAggregation();
+
+  /// Add intermediate aggregation plan node using specified grouping keys,
+  /// aggregate expressions and their types.
+  PlanBuilder& intermediateAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates) {
+    return aggregation(
+        groupingKeys,
+        {},
+        aggregates,
+        {},
+        core::AggregationNode::Step::kIntermediate,
+        false);
+  }
+
+  /// Add a single aggregation plan node using specified grouping keys and
+  /// aggregate expressions. See 'partialAggregation' method for the supported
+  /// types of aggregate expressions.
+  PlanBuilder& singleAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks = {}) {
+    return aggregation(
+        groupingKeys,
+        {},
+        aggregates,
+        masks,
+        core::AggregationNode::Step::kSingle,
+        false);
+  }
+
+  /// Add an AggregationNode using specified grouping keys,
+  /// aggregate expressions and masks. See 'partialAggregation' method for the
+  /// supported types of aggregate expressions.
+  ///
+  /// @param groupingKeys A list of grouping keys. Can be empty for global
+  /// aggregations.
+  /// @param aggregates A list of aggregate expressions. Must contain at least
+  /// one expression.
+  /// @param masks An optional list of boolean input columns to use as masks for
+  /// the aggregates. Can be empty or have fewer elements than 'aggregates' or
+  /// have some elements being empty strings. Non-empty elements must refer to a
+  /// boolean input column, which will be used to mask a corresponding
+  /// aggregate, e.g. aggregate will skip rows where 'mask' column is false.
+  /// @param step Aggregation step: partial, final, intermediate or single.
+  /// @param ignoreNullKeys Boolean indicating whether to skip input rows where
+  /// one of the grouping keys is null.
+  PlanBuilder& aggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks,
+      core::AggregationNode::Step step,
+      bool ignoreNullKeys) {
+    return aggregation(
+        groupingKeys, {}, aggregates, masks, step, ignoreNullKeys);
+  }
+
+  /// Same as above, but also allows to specify a subset of grouping keys on
+  /// which the input is pre-grouped or clustered. Pre-grouped keys enable
+  /// streaming or partially streaming aggregation algorithms which use less
+  /// memory and CPU then hash aggregation. The caller is responsible
+  /// that input data is indeed clustered on the specified keys. If that's not
+  /// the case, the query may return incorrect results.
+  PlanBuilder& aggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& preGroupedKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks,
+      core::AggregationNode::Step step,
+      bool ignoreNullKeys) {
+    return aggregation(
+        groupingKeys,
+        preGroupedKeys,
+        aggregates,
+        masks,
+        step,
+        ignoreNullKeys,
+        {});
+  }
+
+  /// A convenience method to create partial aggregation plan node for the case
+  /// where input is clustered on all grouping keys.
+  PlanBuilder& partialStreamingAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks = {}) {
+    return streamingAggregation(
+        groupingKeys,
+        aggregates,
+        masks,
+        core::AggregationNode::Step::kPartial,
+        false);
+  }
+
+  /// A convenience method to create final aggregation plan node for the case
+  /// where input is clustered on all grouping keys.
+  PlanBuilder& finalStreamingAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates) {
+    return streamingAggregation(
+        groupingKeys,
+        aggregates,
+        {},
+        core::AggregationNode::Step::kFinal,
+        false);
+  }
+
+  /// Add an AggregationNode assuming input is clustered on all grouping keys.
+  PlanBuilder& streamingAggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks,
+      core::AggregationNode::Step step,
+      bool ignoreNullKeys);
+
+  /// Add a GroupIdNode using the specified grouping keys, grouping sets,
+  /// aggregation inputs and a groupId column name.
+  /// The grouping keys can specify aliases if an input column is mapped
+  /// to an output column with a different name.
+  /// e.g. Grouping keys {"k1", "k1 as k2"} means there are 2 grouping keys:
+  /// the input column k1 and output column k2 which is an alias of column k1.
+  /// Grouping sets using above grouping keys use the output column aliases.
+  /// e.g. Grouping sets in the above case could be {{"k1"}, {"k2"}, {}}
+  /// The GroupIdNode output columns have grouping keys in the order specified
+  /// in groupingKeys variable.
+  PlanBuilder& groupId(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::vector<std::string>>& groupingSets,
+      const std::vector<std::string>& aggregationInputs,
+      std::string groupIdName = "group_id");
+
+  /// Add an ExpandNode using specified projections. See comments for
+  /// ExpandNode class for description of this plan node.
+  ///
+  /// @param projections A list of projection expressions. Each expression is
+  /// either a column name, null or non-null constant.
+  ///
+  /// For example,
+  ///
+  ///     .expand(
+  ///            {{"k1", "null:: bigint k2", "a", "b", "0 as gid"}, //
+  ///            Column name will be extracted from the first projection. If the
+  ///            column is null, it is also necessary to specify the column
+  ///            type.
+  ///             {"k1", "null", "a", "b", "1"},
+  ///             {"null", "null", "a", "b", "2"}})
+  ///
+  ///
+  PlanBuilder& expand(const std::vector<std::vector<std::string>>& projections);
+
+  /// Add a LocalMergeNode using specified ORDER BY clauses.
+  ///
+  /// For example,
+  ///
+  ///     .localMerge({"a", "b DESC", "c ASC NULLS FIRST"})
+  ///
+  /// By default, uses ASC NULLS LAST sort order, e.g. column "a" above will use
+  /// ASC NULLS LAST and column "b" will use DESC NULLS LAST.
+  PlanBuilder& localMerge(
+      const std::vector<std::string>& keys,
+      std::vector<core::PlanNodePtr> sources);
+
+  /// A convenience method to add a LocalMergeNode with a single source (the
+  /// current plan node).
+  PlanBuilder& localMerge(const std::vector<std::string>& keys);
+
+  /// Adds an OrderByNode using specified ORDER BY clauses.
+  ///
+  /// For example,
+  ///
+  ///     .orderBy({"a", "b DESC", "c ASC NULLS FIRST"})
+  ///
+  /// By default, uses ASC NULLS LAST sort order, e.g. column "a" above will use
+  /// ASC NULLS LAST and column "b" will use DESC NULLS LAST.
+  PlanBuilder& orderBy(const std::vector<std::string>& keys, bool isPartial);
+
+  /// Add a TopNNode using specified N and ORDER BY clauses.
+  ///
+  /// For example,
+  ///
+  ///     .topN({"a", "b DESC", "c ASC NULLS FIRST"}, 10, true)
+  ///
+  /// By default, uses ASC NULLS LAST sort order, e.g. column "a" above will use
+  /// ASC NULLS LAST and column "b" will use DESC NULLS LAST.
+  PlanBuilder&
+  topN(const std::vector<std::string>& keys, int32_t count, bool isPartial);
+
+  /// Add a LimitNode.
+  ///
+  /// @param offset Offset, i.e. number of rows of input to skip.
+  /// @param count Maximum number of rows to produce after skipping 'offset'
+  /// rows.
+  /// @param isPartial Boolean indicating whether the limit node is partial or
+  /// final. Partial limit can run multi-threaded. Final limit must run
+  /// single-threaded.
+  PlanBuilder& limit(int64_t offset, int64_t count, bool isPartial);
+
+  /// Add an EnforceSingleRowNode to ensure input has at most one row at
+  /// runtime.
+  PlanBuilder& enforceSingleRow();
+
+  /// Add an AssignUniqueIdNode to add a column with query-scoped unique value
+  /// per row.
+  ///
+  /// @param idName The name of output column that contains the unique ID.
+  /// Column type is assumed as BIGINT.
+  /// @param taskUniqueId ID of the Task that will be used to run the query
+  /// plan. The ID must be unique across all the tasks of a single query. Tasks
+  /// may possibly run on different machines.
+  PlanBuilder& assignUniqueId(
+      const std::string& idName = "unique",
+      const int32_t taskUniqueId = 1);
+
+  /// Add a PartitionedOutputNode to hash-partition the input on the specified
+  /// keys using exec::HashPartitionFunction.
+  ///
+  /// @param keys Partitioning keys. May be empty, in which case all input will
+  /// be places in a single partition.
+  /// @param numPartitions Number of partitions. Must be greater than or equal
+  /// to 1. Keys must not be empty if greater than 1.
+  /// @param replicateNullsAndAny Boolean indicating whether to replicate one
+  /// arbitrary entry and all entries with null keys to all partitions. Used to
+  /// implement proper ANTI join semantics in a distributed execution
+  /// environment.
+  /// @param outputLayout Optional output layout in case it is different then
+  /// the input. Output columns may appear in different order from the input,
+  /// some input columns may be missing in the output, some columns may be
+  /// duplicated in the output.
+  PlanBuilder& partitionedOutput(
+      const std::vector<std::string>& keys,
+      int numPartitions,
+      bool replicateNullsAndAny,
+      const std::vector<std::string>& outputLayout = {},
+      VectorSerde::Kind serdeKind = VectorSerde::Kind::kPresto);
+
+  /// Same as above, but assumes 'replicateNullsAndAny' is false.
+  PlanBuilder& partitionedOutput(
+      const std::vector<std::string>& keys,
+      int numPartitions,
+      const std::vector<std::string>& outputLayout = {},
+      VectorSerde::Kind serdeKind = VectorSerde::Kind::kPresto);
+
+  /// Same as above, but allows to provide custom partition function.
+  PlanBuilder& partitionedOutput(
+      const std::vector<std::string>& keys,
+      int numPartitions,
+      bool replicateNullsAndAny,
+      core::PartitionFunctionSpecPtr partitionFunctionSpec,
+      const std::vector<std::string>& outputLayout = {},
+      VectorSerde::Kind serdeKind = VectorSerde::Kind::kPresto);
+
+  /// Adds a PartitionedOutputNode to broadcast the input data.
+  ///
+  /// @param outputLayout Optional output layout in case it is different then
+  /// the input. Output columns may appear in different order from the input,
+  /// some input columns may be missing in the output, some columns may be
+  /// duplicated in the output.
+  PlanBuilder& partitionedOutputBroadcast(
+      const std::vector<std::string>& outputLayout = {},
+      VectorSerde::Kind serdeKind = VectorSerde::Kind::kPresto);
+
+  /// Adds a PartitionedOutputNode to put data into arbitrary buffer.
+  PlanBuilder& partitionedOutputArbitrary(
+      const std::vector<std::string>& outputLayout = {},
+      VectorSerde::Kind serdeKind = VectorSerde::Kind::kPresto);
+
+  /// Adds a LocalPartitionNode to hash-partition the input on the specified
+  /// keys using exec::HashPartitionFunction. Number of partitions is determined
+  /// at runtime based on parallelism of the downstream pipeline.
+  ///
+  /// @param keys Partitioning keys. May be empty, in which case all input will
+  /// be places in a single partition.
+  /// @param sources One or more plan nodes that produce input data.
+  PlanBuilder& localPartition(
+      const std::vector<std::string>& keys,
+      const std::vector<core::PlanNodePtr>& sources);
+
+  /// A convenience method to add a LocalPartitionNode with a single source (the
+  /// current plan node).
+  PlanBuilder& localPartition(const std::vector<std::string>& keys);
+
+  /// A convenience method to add a LocalPartitionNode with iceberg partition
+  /// function.
+  PlanBuilder& localPartition(
+      int numBuckets,
+      const std::vector<column_index_t>& channels,
+      const std::vector<VectorPtr>& constValues);
+
+  /// A convenience method to add a LocalPartitionNode with a single source (the
+  /// current plan node) and iceberg bucket property.
+//  PlanBuilder& localPartitionByBucket(
+//      const std::shared_ptr<connector::lakehouse::iceberg::IcebergBucketProperty>&
+//          bucketProperty);
+
+  /// Add a LocalPartitionNode to partition the input using batch-level
+  /// round-robin. Number of partitions is determined at runtime based on
+  /// parallelism of the downstream pipeline.
+  ///
+  /// @param sources One or more plan nodes that produce input data.
+  PlanBuilder& localPartitionRoundRobin(
+      const std::vector<core::PlanNodePtr>& sources);
+
+  /// A convenience method to add a LocalPartitionNode with a single source (the
+  /// current plan node).
+  PlanBuilder& localPartitionRoundRobin();
+
+  /// A convenience method to add a LocalPartitionNode for scale writer with
+  /// hash partitioning.
+  PlanBuilder& scaleWriterlocalPartition(const std::vector<std::string>& keys);
+
+  /// A convenience method to add a LocalPartitionNode for scale writer with
+  /// round-robin partitioning.
+  PlanBuilder& scaleWriterlocalPartitionRoundRobin();
+
+  /// Add a LocalPartitionNode to partition the input using row-wise
+  /// round-robin. Number of partitions is determined at runtime based on
+  /// parallelism of the downstream pipeline.
+  PlanBuilder& localPartitionRoundRobinRow();
+
+  /// Add a HashJoinNode to join two inputs using one or more join keys and an
+  /// optional filter.
+  ///
+  /// @param leftKeys Join keys from the probe side, the preceding plan node.
+  /// Cannot be empty.
+  /// @param rightKeys Join keys from the build side, the plan node specified in
+  /// 'build' parameter. The number and types of left and right keys must be the
+  /// same.
+  /// @param build Plan node for the build side. Typically, to reduce memory
+  /// usage, the smaller input is placed on the build-side.
+  /// @param filter Optional SQL expression for the additional join filter. Can
+  /// use columns from both probe and build sides of the join.
+  /// @param outputLayout Output layout consisting of columns from probe and
+  /// build sides.
+  /// @param joinType Type of the join: inner, left, right, full, semi, or anti.
+  /// @param nullAware Applies to semi and anti joins. Indicates whether the
+  /// join follows IN (null-aware) or EXISTS (regular) semantic.
+  PlanBuilder& hashJoin(
+      const std::vector<std::string>& leftKeys,
+      const std::vector<std::string>& rightKeys,
+      const core::PlanNodePtr& build,
+      const std::string& filter,
+      const std::vector<std::string>& outputLayout,
+      core::JoinType joinType = core::JoinType::kInner,
+      bool nullAware = false);
+
+  /// Add a MergeJoinNode to join two inputs using one or more join keys and an
+  /// optional filter. The caller is responsible to ensure that inputs are
+  /// sorted in ascending order on the join keys. If that's not the case, the
+  /// query may produce incorrect results.
+  ///
+  /// See hashJoin method for the description of the parameters.
+  PlanBuilder& mergeJoin(
+      const std::vector<std::string>& leftKeys,
+      const std::vector<std::string>& rightKeys,
+      const core::PlanNodePtr& build,
+      const std::string& filter,
+      const std::vector<std::string>& outputLayout,
+      core::JoinType joinType = core::JoinType::kInner);
+
+  /// Add a NestedLoopJoinNode to join two inputs using filter as join
+  /// condition to perform equal/non-equal join. Only supports inner/outer
+  /// joins.
+  ///
+  /// @param right Right-side input. Typically, to reduce memory usage, the
+  /// smaller input is placed on the right-side.
+  /// @param joinCondition SQL expression as the join condition. Can
+  /// use columns from both probe and build sides of the join.
+  /// @param outputLayout Output layout consisting of columns from probe and
+  /// build sides.
+  /// @param joinType Type of the join: inner, left, right, full.
+  PlanBuilder& nestedLoopJoin(
+      const core::PlanNodePtr& right,
+      const std::string& joinCondition,
+      const std::vector<std::string>& outputLayout,
+      core::JoinType joinType = core::JoinType::kInner);
+
+  /// Add a NestedLoopJoinNode to produce a cross product of the inputs. First
+  /// input comes from the preceding plan node. Second input is specified in
+  /// 'right' parameter.
+  ///
+  /// @param right Right-side input. Typically, to reduce memory usage, the
+  /// smaller input is placed on the right-side.
+  /// @param outputLayout Output layout consisting of columns from left and
+  /// right sides.
+  PlanBuilder& nestedLoopJoin(
+      const core::PlanNodePtr& right,
+      const std::vector<std::string>& outputLayout,
+      core::JoinType joinType = core::JoinType::kInner);
+
+  static core::IndexLookupConditionPtr parseIndexJoinCondition(
+      const std::string& joinCondition,
+      const RowTypePtr& rowType,
+      memory::MemoryPool* pool);
+
+  /// Add an IndexLoopJoinNode to join two inputs using one or more join keys
+  /// plus optional join conditions. First input comes from the preceding plan
+  /// node. Second input is specified in 'right' parameter and must be a
+  /// table source with the connector table handle with index lookup support.
+  ///
+  /// @param leftKeys Join keys from the probe side, the preceding plan node.
+  /// Cannot be empty.
+  /// @param rightKeys Join keys from the index lookup side, the plan node
+  /// specified in 'right' parameter. The number and types of left and right
+  /// keys must be the same.
+  /// @param right The right input source with index lookup support.
+  /// @param joinConditions SQL expressions as the join conditions. Each join
+  /// condition must use columns from both sides. For the right side, it can
+  /// only use one index column. Currently we support "in" and "between" join
+  /// conditions:
+  /// "in" condition is written as SQL expression as "contains(a, b)" where "b"
+  /// is the index column from right side and "a" is the condition column from
+  /// left side. "b" has type T and "a" has type ARRAT(T).
+  /// "between" condition is written as SQL expression as "a between b and c"
+  /// where "a" is the index column from right side and "b", "c" are either
+  /// condition column from left side or a constant but at least one of them
+  /// must not be constant. They all have the same type.
+  /// @param filter SQL expression for the additional join filter to apply on
+  /// join results. This supports filters that can't be converted into join
+  /// conditions or lookup conditions. Can be an empty string if no additional
+  /// filter is needed.
+  /// @param hasMarker if true, 'outputLayout' should include a boolean
+  /// column at the end to indicate if a join output row has a match or not.
+  /// This only applies for left join.
+  /// @param outputLayout Output layout consisting of columns from probe and
+  /// build sides.
+  /// @param joinType Type of the join supported: inner, left.
+  PlanBuilder& indexLookupJoin(
+      const std::vector<std::string>& leftKeys,
+      const std::vector<std::string>& rightKeys,
+      const core::TableScanNodePtr& right,
+      const std::vector<std::string>& joinConditions,
+      const std::string& filter,
+      bool hasMarker,
+      const std::vector<std::string>& outputLayout,
+      core::JoinType joinType = core::JoinType::kInner);
+
+  /// Add an UnnestNode to unnest one or more columns of type array or map.
+  ///
+  /// The output will contain 'replicatedColumns' followed by unnested columns,
+  /// followed by an optional ordinality column.
+  ///
+  /// Array columns are unnested into a single column whose name is generated by
+  /// appending '_e' suffix to the array column name.
+  ///
+  /// Map columns are unnested into two columns whoes names are generated by
+  /// appending '_k' and '_v' suffixes to the map column name.
+  ///
+  /// @param replicateColumns A subset of input columns to include in the output
+  /// unmodified.
+  /// @param unnestColumns A subset of input columns to unnest. These columns
+  /// must be of type array or map.
+  /// @param ordinalColumn An optional name for the 'ordinal' column to produce.
+  /// This column contains the index of the element of the unnested array or
+  /// map. If not specified, the output will not contain this column.
+  /// @param markerName An optional name for the marker column to produce.
+  /// This column contains a boolean indicating whether the output row has
+  /// non-empty unnested value. If not specified, the output will not contain
+  /// this column and the unnest operator also skips producing output rows
+  /// with empty unnest value.
+  PlanBuilder& unnest(
+      const std::vector<std::string>& replicateColumns,
+      const std::vector<std::string>& unnestColumns,
+      const std::optional<std::string>& ordinalColumn = std::nullopt,
+      const std::optional<std::string>& markerName = std::nullopt);
+
+  /// Add a WindowNode to compute one or more windowFunctions.
+  /// @param windowFunctions A list of one or more window function SQL like
+  /// strings to be computed by this windowNode.
+  /// A window function SQL string looks like :
+  /// "name(parameters) OVER (PARTITION BY partition_keys ORDER BY
+  /// sorting_keys [ROWS|RANGE BETWEEN [UNBOUNDED PRECEDING | x PRECEDING |
+  /// CURRENT ROW] AND [UNBOUNDED FOLLOWING | x FOLLOWING | CURRENT ROW]] AS
+  /// columnName"
+  /// The PARTITION BY and ORDER BY clauses are optional. An empty PARTITION
+  /// list means all the table rows are in a single partition.
+  /// An empty ORDER BY list means the window functions will be computed over
+  /// all the rows in the partition in a random order. Also, the default frame
+  /// if unspecified is RANGE OVER UNBOUNDED PRECEDING AND CURRENT ROW.
+  /// Some examples of window function strings are as follows:
+  /// "first_value(c) over (partition by a order by b) as d"
+  /// "first_value(c) over (partition by a) as d"
+  /// "first_value(c) over ()"
+  /// "row_number() over (order by b) as a"
+  /// "row_number() over (partition by a order by b
+  ///  rows between a + 10 preceding and 10 following)"
+  PlanBuilder& window(const std::vector<std::string>& windowFunctions);
+
+  /// Adds WindowNode to compute window functions over pre-sorted inputs.
+  /// All functions must use same partition by and sorting keys and input must
+  /// be already sorted on these.
+  PlanBuilder& streamingWindow(const std::vector<std::string>& windowFunctions);
+
+  /// Add a RowNumberNode to compute single row_number window function with an
+  /// optional limit and no sorting.
+  PlanBuilder& rowNumber(
+      const std::vector<std::string>& partitionKeys,
+      std::optional<int32_t> limit = std::nullopt,
+      bool generateRowNumber = true);
+
+  /// Add a TopNRowNumberNode to compute row_number
+  /// function with a limit applied to sorted partitions.
+  PlanBuilder& topNRowNumber(
+      const std::vector<std::string>& partitionKeys,
+      const std::vector<std::string>& sortingKeys,
+      int32_t limit,
+      bool generateRowNumber);
+
+  /// Add a TopNRowNumberNode to compute row_number, rank or dense_rank window
+  /// function with a limit applied to sorted partitions.
+  PlanBuilder& topNRank(
+      std::string_view function,
+      const std::vector<std::string>& partitionKeys,
+      const std::vector<std::string>& sortingKeys,
+      int32_t limit,
+      bool generateRowNumber);
+
+  /// Add a MarkDistinctNode to compute aggregate mask channel
+  /// @param markerKey Name of output mask channel
+  /// @param distinctKeys List of columns to be marked distinct.
+  PlanBuilder& markDistinct(
+      std::string markerKey,
+      const std::vector<std::string>& distinctKeys);
+
+  /// Stores the latest plan node ID into the specified variable. Useful for
+  /// capturing IDs of the leaf plan nodes (table scans, exchanges, etc.) to use
+  /// when adding splits at runtime.
+  PlanBuilder& capturePlanNodeId(core::PlanNodeId& id) {
+    VELOX_CHECK_NOT_NULL(planNode_);
+    id = planNode_->id();
+    return *this;
+  }
+
+  /// Captures the id for the latest TableScanNode. this is useful when using
+  /// filtersAsNode(), where a table scan can have a filter over it.
+  PlanBuilder& captureScanNodeId(core::PlanNodeId& id) {
+    auto node = planNode_;
+    for (;;) {
+      VELOX_CHECK_NOT_NULL(node);
+      if (dynamic_cast<const core::TableScanNode*>(node.get())) {
+        id = node->id();
+        return *this;
+      }
+      node = node->sources()[0];
+    }
+  }
+
+  /// Stores the latest plan node into the specified variable. Useful for
+  /// capturing intermediate plan nodes without interrupting the build flow.
+  template <typename T = core::PlanNode>
+  PlanBuilder& capturePlanNode(std::shared_ptr<const T>& planNode) {
+    VELOX_CHECK_NOT_NULL(planNode_);
+    planNode = std::dynamic_pointer_cast<const T>(planNode_);
+    VELOX_CHECK_NOT_NULL(planNode);
+    return *this;
+  }
+
+  /// Return the latest plan node, e.g. the root node of the plan
+  /// tree. The DistributedPlanBuilder override additionally moves stage
+  /// information to a parent PlanBuilder.
+  const core::PlanNodePtr& planNode() const {
+    return planNode_;
+  }
+
+  /// Return tha latest plan node wrapped in core::PlanFragment struct.
+  core::PlanFragment planFragment() const {
+    return core::PlanFragment{planNode_};
+  }
+
+  /// Add a user-defined PlanNode as the root of the plan. 'func' takes
+  /// the current root of the plan and returns the new root.
+  PlanBuilder& addNode(
+      std::function<core::PlanNodePtr(std::string nodeId, core::PlanNodePtr)>
+          func) {
+    planNode_ = func(nextPlanNodeId(), planNode_);
+    return *this;
+  }
+
+  /// Set parsing options
+  PlanBuilder& setParseOptions(const parse::ParseOptions& options) {
+    options_ = options;
+    return *this;
+  }
+
+  /// In a DistributedPlanBuilder, introduces a shuffle boundary. The plan so
+  /// far is shuffled and subsequent nodes consume the shuffle. Arguments are as
+  /// in partitionedOutput().
+  virtual PlanBuilder& shufflePartitioned(
+      const std::vector<std::string>& keys,
+      int numPartitions,
+      bool replicateNullsAndAny,
+      const std::vector<std::string>& outputLayout = {}) {
+    VELOX_UNSUPPORTED("Needs DistributedPlanBuilder");
+  }
+
+  /// In a DistributedPlanBuilder, returns an Exchange on top of the plan built
+  /// so far and couples it to the current stage in the enclosing builder.
+  /// Arguments are as in shuffle().
+  virtual core::PlanNodePtr shufflePartitionedResult(
+      const std::vector<std::string>& keys,
+      int numPartitions,
+      bool replicateNullsAndAny,
+      const std::vector<std::string>& outputLayout = {}) {
+    VELOX_UNSUPPORTED("Needs DistributedPlanBuilder");
+  }
+
+  /// In a DistributedPlanBuilder, returns an Exchange on top of the plan built
+  /// so far that ends with a broadcast PartitionedOutput node, and couples the
+  /// Exchange to the current stage in the enclosing builder.
+  virtual core::PlanNodePtr shuffleBroadcastResult() {
+    VELOX_UNSUPPORTED("Needs DistributedPlanBuilder");
+  }
+
+ protected:
+  // Users who create custom operators might want to extend the PlanBuilder to
+  // customize extended plan builders. Those functions are needed in such
+  // extensions.
+  core::PlanNodeId nextPlanNodeId();
+
+  std::shared_ptr<const core::ITypedExpr> inferTypes(
+      const core::ExprPtr& untypedExpr);
+
+  std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator() const {
+    return planNodeIdGenerator_;
+  }
+
+  memory::MemoryPool* pool() const {
+    return pool_;
+  }
+
+ private:
+  std::shared_ptr<const core::FieldAccessTypedExpr> field(column_index_t index);
+
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields(
+      const std::vector<column_index_t>& indices);
+
+  std::shared_ptr<const core::FieldAccessTypedExpr> field(
+      const std::string& name);
+
+  std::vector<core::TypedExprPtr> exprs(
+      const std::vector<std::string>& expressions,
+      const RowTypePtr& inputType);
+
+  std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields(
+      const std::vector<std::string>& names);
+
+  static std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields(
+      const RowTypePtr& inputType,
+      const std::vector<std::string>& names);
+
+  static std::vector<std::shared_ptr<const core::FieldAccessTypedExpr>> fields(
+      const RowTypePtr& inputType,
+      const std::vector<column_index_t>& indices);
+
+  static std::shared_ptr<const core::FieldAccessTypedExpr> field(
+      const RowTypePtr& inputType,
+      column_index_t index);
+
+  static std::shared_ptr<const core::FieldAccessTypedExpr> field(
+      const RowTypePtr& inputType,
+      const std::string& name);
+
+  core::PlanNodePtr createIntermediateOrFinalAggregation(
+      core::AggregationNode::Step step,
+      const core::AggregationNode* partialAggNode);
+
+  struct AggregatesAndNames {
+    std::vector<core::AggregationNode::Aggregate> aggregates;
+    std::vector<std::string> names;
+  };
+
+  AggregatesAndNames createAggregateExpressionsAndNames(
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks,
+      core::AggregationNode::Step step,
+      const std::vector<std::vector<TypePtr>>& rawInputTypes = {});
+
+  PlanBuilder& aggregation(
+      const std::vector<std::string>& groupingKeys,
+      const std::vector<std::string>& preGroupedKeys,
+      const std::vector<std::string>& aggregates,
+      const std::vector<std::string>& masks,
+      core::AggregationNode::Step step,
+      bool ignoreNullKeys,
+      const std::vector<std::vector<TypePtr>>& rawInputTypes);
+
+  /// Create WindowNode based on whether input is sorted and then compute the
+  /// window functions.
+  PlanBuilder& window(
+      const std::vector<std::string>& windowFunctions,
+      bool inputSorted);
+
+ protected:
+  core::PlanNodePtr planNode_;
+  parse::ParseOptions options_;
+  std::shared_ptr<TableScanBuilder> tableScanBuilder_;
+  std::shared_ptr<IndexLookupJoinBuilder> indexLookupJoinBuilder_;
+
+ private:
+  std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator_;
+  memory::MemoryPool* pool_;
+  bool filtersAsNode_{false};
+};
+} // namespace facebook::velox::exec::test
diff --git a/velox/connectors/lakehouse/storage_adapters/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/CMakeLists.txt
new file mode 100644
index 000000000000..bd7c37f81640
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_subdirectory(s3fs)
+add_subdirectory(hdfs)
+add_subdirectory(gcs)
+add_subdirectory(abfs)
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.cpp
new file mode 100644
index 000000000000..4aa75c1fc1d6
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AbfsConfig.h"
+
+#include "AbfsUtil.h"
+#include "velox/common/config/Config.h"
+
+#include <azure/identity/client_secret_credential.hpp>
+
+namespace facebook::velox::filesystems {
+
+std::function<std::unique_ptr<AzureDataLakeFileClient>()>
+    AbfsConfig::testWriteClientFn_;
+
+class DataLakeFileClientWrapper final : public AzureDataLakeFileClient {
+ public:
+  DataLakeFileClientWrapper(std::unique_ptr<DataLakeFileClient> client)
+      : client_(std::move(client)) {}
+
+  void create() override {
+    client_->Create();
+  }
+
+  Azure::Storage::Files::DataLake::Models::PathProperties getProperties()
+      override {
+    return client_->GetProperties().Value;
+  }
+
+  void append(const uint8_t* buffer, size_t size, uint64_t offset) override {
+    auto bodyStream = Azure::Core::IO::MemoryBodyStream(buffer, size);
+    client_->Append(bodyStream, offset);
+  }
+
+  void flush(uint64_t position) override {
+    client_->Flush(position);
+  }
+
+  void close() override {
+    // do nothing.
+  }
+
+  std::string getUrl() const override {
+    return client_->GetUrl();
+  }
+
+ private:
+  const std::unique_ptr<DataLakeFileClient> client_;
+};
+
+AbfsConfig::AbfsConfig(
+    std::string_view path,
+    const config::ConfigBase& config) {
+  std::string_view file;
+  isHttps_ = true;
+  if (path.find(kAbfssScheme) == 0) {
+    file = path.substr(kAbfssScheme.size());
+  } else if (path.find(kAbfsScheme) == 0) {
+    file = path.substr(kAbfsScheme.size());
+    isHttps_ = false;
+  } else {
+    VELOX_FAIL("Invalid ABFS Path {}", path);
+  }
+
+  auto firstAt = file.find_first_of("@");
+  fileSystem_ = file.substr(0, firstAt);
+  auto firstSep = file.find_first_of("/");
+  filePath_ = file.substr(firstSep + 1);
+  accountNameWithSuffix_ = file.substr(firstAt + 1, firstSep - firstAt - 1);
+
+  auto authTypeKey =
+      fmt::format("{}.{}", kAzureAccountAuthType, accountNameWithSuffix_);
+  authType_ = kAzureSharedKeyAuthType;
+  if (config.valueExists(authTypeKey)) {
+    authType_ = config.get<std::string>(authTypeKey).value();
+  }
+  if (authType_ == kAzureSharedKeyAuthType) {
+    auto credKey =
+        fmt::format("{}.{}", kAzureAccountKey, accountNameWithSuffix_);
+    VELOX_USER_CHECK(
+        config.valueExists(credKey), "Config {} not found", credKey);
+    auto firstDot = accountNameWithSuffix_.find_first_of(".");
+    auto accountName = accountNameWithSuffix_.substr(0, firstDot);
+    auto endpointSuffix = accountNameWithSuffix_.substr(firstDot + 5);
+    std::stringstream ss;
+    ss << "DefaultEndpointsProtocol=" << (isHttps_ ? "https" : "http");
+    ss << ";AccountName=" << accountName;
+    ss << ";AccountKey=" << config.get<std::string>(credKey).value();
+    ss << ";EndpointSuffix=" << endpointSuffix;
+
+    if (config.valueExists(kAzureBlobEndpoint)) {
+      ss << ";BlobEndpoint="
+         << config.get<std::string>(kAzureBlobEndpoint).value();
+    }
+    ss << ";";
+    connectionString_ = ss.str();
+  } else if (authType_ == kAzureOAuthAuthType) {
+    auto clientIdKey = fmt::format(
+        "{}.{}", kAzureAccountOAuth2ClientId, accountNameWithSuffix_);
+    auto clientSecretKey = fmt::format(
+        "{}.{}", kAzureAccountOAuth2ClientSecret, accountNameWithSuffix_);
+    auto clientEndpointKey = fmt::format(
+        "{}.{}", kAzureAccountOAuth2ClientEndpoint, accountNameWithSuffix_);
+    VELOX_USER_CHECK(
+        config.valueExists(clientIdKey), "Config {} not found", clientIdKey);
+    VELOX_USER_CHECK(
+        config.valueExists(clientSecretKey),
+        "Config {} not found",
+        clientSecretKey);
+    VELOX_USER_CHECK(
+        config.valueExists(clientEndpointKey),
+        "Config {} not found",
+        clientEndpointKey);
+    auto clientEndpoint = config.get<std::string>(clientEndpointKey).value();
+    auto firstSep = clientEndpoint.find_first_of("/", /* https:// */ 8);
+    authorityHost_ = clientEndpoint.substr(0, firstSep + 1);
+    auto sedondSep = clientEndpoint.find_first_of("/", firstSep + 1);
+    tenentId_ = clientEndpoint.substr(firstSep + 1, sedondSep - firstSep - 1);
+    Azure::Identity::ClientSecretCredentialOptions options;
+    options.AuthorityHost = authorityHost_;
+    tokenCredential_ =
+        std::make_shared<Azure::Identity::ClientSecretCredential>(
+            tenentId_,
+            config.get<std::string>(clientIdKey).value(),
+            config.get<std::string>(clientSecretKey).value(),
+            options);
+  } else if (authType_ == kAzureSASAuthType) {
+    auto sasKey = fmt::format("{}.{}", kAzureSASKey, accountNameWithSuffix_);
+    VELOX_USER_CHECK(config.valueExists(sasKey), "Config {} not found", sasKey);
+    sas_ = config.get<std::string>(sasKey).value();
+  } else {
+    VELOX_USER_FAIL(
+        "Unsupported auth type {}, supported auth types are SharedKey, OAuth and SAS.",
+        authType_);
+  }
+}
+
+std::unique_ptr<BlobClient> AbfsConfig::getReadFileClient() {
+  if (authType_ == kAzureSASAuthType) {
+    auto url = getUrl(true);
+    return std::make_unique<BlobClient>(fmt::format("{}?{}", url, sas_));
+  } else if (authType_ == kAzureOAuthAuthType) {
+    auto url = getUrl(true);
+    return std::make_unique<BlobClient>(url, tokenCredential_);
+  } else {
+    return std::make_unique<BlobClient>(BlobClient::CreateFromConnectionString(
+        connectionString_, fileSystem_, filePath_));
+  }
+}
+
+std::unique_ptr<AzureDataLakeFileClient> AbfsConfig::getWriteFileClient() {
+  if (testWriteClientFn_) {
+    return testWriteClientFn_();
+  }
+  std::unique_ptr<DataLakeFileClient> client;
+  if (authType_ == kAzureSASAuthType) {
+    auto url = getUrl(false);
+    client =
+        std::make_unique<DataLakeFileClient>(fmt::format("{}?{}", url, sas_));
+  } else if (authType_ == kAzureOAuthAuthType) {
+    auto url = getUrl(false);
+    client = std::make_unique<DataLakeFileClient>(url, tokenCredential_);
+  } else {
+    client = std::make_unique<DataLakeFileClient>(
+        DataLakeFileClient::CreateFromConnectionString(
+            connectionString_, fileSystem_, filePath_));
+  }
+  return std::make_unique<DataLakeFileClientWrapper>(std::move(client));
+}
+
+std::string AbfsConfig::getUrl(bool withblobSuffix) {
+  std::string accountNameWithSuffixForUrl(accountNameWithSuffix_);
+  if (withblobSuffix) {
+    // We should use correct suffix for blob client.
+    size_t start_pos = accountNameWithSuffixForUrl.find("dfs");
+    if (start_pos != std::string::npos) {
+      accountNameWithSuffixForUrl.replace(start_pos, 3, "blob");
+    }
+  }
+  return fmt::format(
+      "{}{}/{}/{}",
+      isHttps_ ? "https://" : "http://",
+      accountNameWithSuffixForUrl,
+      fileSystem_,
+      filePath_);
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h
new file mode 100644
index 000000000000..7e43f489b266
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <azure/core/credentials/credentials.hpp>
+#include <azure/storage/blobs/blob_client.hpp>
+#include <azure/storage/files/datalake.hpp>
+#include <folly/hash/Hash.h>
+#include <string>
+#include "AzureDataLakeFileClient.h"
+
+using namespace Azure::Storage::Blobs;
+using namespace Azure::Storage::Files::DataLake;
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::filesystems {
+
+// This is used to specify the Azurite endpoint in testing.
+static constexpr const char* kAzureBlobEndpoint{"fs.azure.blob-endpoint"};
+
+// The authentication mechanism is set in `fs.azure.account.auth.type` (or the
+// account specific variant). The supported values are SharedKey, OAuth and SAS.
+static constexpr const char* kAzureAccountAuthType =
+    "fs.azure.account.auth.type";
+
+static constexpr const char* kAzureAccountKey = "fs.azure.account.key";
+
+static constexpr const char* kAzureSASKey = "fs.azure.sas.fixed.token";
+
+static constexpr const char* kAzureAccountOAuth2ClientId =
+    "fs.azure.account.oauth2.client.id";
+
+static constexpr const char* kAzureAccountOAuth2ClientSecret =
+    "fs.azure.account.oauth2.client.secret";
+
+// Token end point, this can be found through Azure portal. For example:
+// https://login.microsoftonline.com/{TENANTID}/oauth2/token
+static constexpr const char* kAzureAccountOAuth2ClientEndpoint =
+    "fs.azure.account.oauth2.client.endpoint";
+
+static constexpr const char* kAzureSharedKeyAuthType = "SharedKey";
+
+static constexpr const char* kAzureOAuthAuthType = "OAuth";
+
+static constexpr const char* kAzureSASAuthType = "SAS";
+
+class AbfsConfig {
+ public:
+  explicit AbfsConfig(std::string_view path, const config::ConfigBase& config);
+
+  std::unique_ptr<BlobClient> getReadFileClient();
+
+  std::unique_ptr<AzureDataLakeFileClient> getWriteFileClient();
+
+  std::string filePath() const {
+    return filePath_;
+  }
+
+  /// Test only.
+  std::string fileSystem() const {
+    return fileSystem_;
+  }
+
+  /// Test only.
+  std::string connectionString() const {
+    return connectionString_;
+  }
+
+  /// Test only.
+  std::string tenentId() const {
+    return tenentId_;
+  }
+
+  /// Test only.
+  std::string authorityHost() const {
+    return authorityHost_;
+  }
+
+  /// Test only.
+  static void setUpTestWriteClient(
+      std::function<std::unique_ptr<AzureDataLakeFileClient>()> testClientFn) {
+    testWriteClientFn_ = testClientFn;
+  }
+
+  /// Test only.
+  static void tearDownTestWriteClient() {
+    testWriteClientFn_ = nullptr;
+  }
+
+ private:
+  std::string getUrl(bool withblobSuffix);
+
+  std::string authType_;
+
+  // Container name is called FileSystem in some Azure API.
+  std::string fileSystem_;
+  std::string filePath_;
+  std::string connectionString_;
+
+  bool isHttps_;
+  std::string accountNameWithSuffix_;
+
+  std::string sas_;
+
+  std::string tenentId_;
+  std::string authorityHost_;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential> tokenCredential_;
+
+  static std::function<std::unique_ptr<AzureDataLakeFileClient>()>
+      testWriteClientFn_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.cpp
new file mode 100644
index 000000000000..70afebb82284
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.cpp
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AbfsFileSystem.h"
+
+#include <fmt/format.h>
+#include <folly/executors/IOThreadPoolExecutor.h>
+#include <glog/logging.h>
+
+#include "AbfsConfig.h"
+#include "AbfsReadFile.h"
+#include "AbfsUtil.h"
+#include "AbfsWriteFile.h"
+
+namespace facebook::velox::filesystems {
+
+class AbfsReadFile::Impl {
+  constexpr static uint64_t kNaturalReadSize = 4 << 20; // 4M
+  constexpr static uint64_t kReadConcurrency = 8;
+
+ public:
+  explicit Impl(std::string_view path, const config::ConfigBase& config) {
+    auto abfsConfig = AbfsConfig(path, config);
+    filePath_ = abfsConfig.filePath();
+    fileClient_ = abfsConfig.getReadFileClient();
+  }
+
+  void initialize(const FileOptions& options) {
+    if (options.fileSize.has_value()) {
+      VELOX_CHECK_GE(
+          options.fileSize.value(), 0, "File size must be non-negative");
+      length_ = options.fileSize.value();
+    }
+
+    if (length_ != -1) {
+      return;
+    }
+
+    try {
+      auto properties = fileClient_->GetProperties();
+      length_ = properties.Value.BlobSize;
+    } catch (Azure::Storage::StorageException& e) {
+      throwStorageExceptionWithOperationDetails("GetProperties", filePath_, e);
+    }
+    VELOX_CHECK_GE(length_, 0);
+  }
+
+  std::string_view pread(
+      uint64_t offset,
+      uint64_t length,
+      void* buffer,
+      File::IoStats* stats) const {
+    preadInternal(offset, length, static_cast<char*>(buffer));
+    return {static_cast<char*>(buffer), length};
+  }
+
+  std::string pread(uint64_t offset, uint64_t length, File::IoStats* stats)
+      const {
+    std::string result(length, 0);
+    preadInternal(offset, length, result.data());
+    return result;
+  }
+
+  uint64_t preadv(
+      uint64_t offset,
+      const std::vector<folly::Range<char*>>& buffers,
+      File::IoStats* stats) const {
+    size_t length = 0;
+    auto size = buffers.size();
+    for (auto& range : buffers) {
+      length += range.size();
+    }
+    std::string result(length, 0);
+    preadInternal(offset, length, static_cast<char*>(result.data()));
+    size_t resultOffset = 0;
+    for (auto range : buffers) {
+      if (range.data()) {
+        memcpy(range.data(), &(result.data()[resultOffset]), range.size());
+      }
+      resultOffset += range.size();
+    }
+
+    return length;
+  }
+
+  uint64_t preadv(
+      folly::Range<const common::Region*> regions,
+      folly::Range<folly::IOBuf*> iobufs,
+      File::IoStats* stats) const {
+    size_t length = 0;
+    VELOX_CHECK_EQ(regions.size(), iobufs.size());
+    for (size_t i = 0; i < regions.size(); ++i) {
+      const auto& region = regions[i];
+      auto& output = iobufs[i];
+      output = folly::IOBuf(folly::IOBuf::CREATE, region.length);
+      pread(region.offset, region.length, output.writableData(), stats);
+      output.append(region.length);
+      length += region.length;
+    }
+
+    return length;
+  }
+
+  uint64_t size() const {
+    return length_;
+  }
+
+  uint64_t memoryUsage() const {
+    return 3 * sizeof(std::string) + sizeof(int64_t);
+  }
+
+  bool shouldCoalesce() const {
+    return false;
+  }
+
+  std::string getName() const {
+    return filePath_;
+  }
+
+  uint64_t getNaturalReadSize() const {
+    return kNaturalReadSize;
+  }
+
+ private:
+  void preadInternal(uint64_t offset, uint64_t length, char* position) const {
+    // Read the desired range of bytes.
+    Azure::Core::Http::HttpRange range;
+    range.Offset = offset;
+    range.Length = length;
+
+    Azure::Storage::Blobs::DownloadBlobOptions blob;
+    blob.Range = range;
+    auto response = fileClient_->Download(blob);
+    response.Value.BodyStream->ReadToCount(
+        reinterpret_cast<uint8_t*>(position), length);
+  }
+
+  std::string filePath_;
+  std::unique_ptr<BlobClient> fileClient_;
+  int64_t length_ = -1;
+};
+
+AbfsReadFile::AbfsReadFile(
+    std::string_view path,
+    const config::ConfigBase& config) {
+  impl_ = std::make_shared<Impl>(path, config);
+}
+
+void AbfsReadFile::initialize(const FileOptions& options) {
+  return impl_->initialize(options);
+}
+
+std::string_view AbfsReadFile::pread(
+    uint64_t offset,
+    uint64_t length,
+    void* buffer,
+    File::IoStats* stats) const {
+  return impl_->pread(offset, length, buffer, stats);
+}
+
+std::string AbfsReadFile::pread(
+    uint64_t offset,
+    uint64_t length,
+    File::IoStats* stats) const {
+  return impl_->pread(offset, length, stats);
+}
+
+uint64_t AbfsReadFile::preadv(
+    uint64_t offset,
+    const std::vector<folly::Range<char*>>& buffers,
+    File::IoStats* stats) const {
+  return impl_->preadv(offset, buffers, stats);
+}
+
+uint64_t AbfsReadFile::preadv(
+    folly::Range<const common::Region*> regions,
+    folly::Range<folly::IOBuf*> iobufs,
+    File::IoStats* stats) const {
+  return impl_->preadv(regions, iobufs, stats);
+}
+
+uint64_t AbfsReadFile::size() const {
+  return impl_->size();
+}
+
+uint64_t AbfsReadFile::memoryUsage() const {
+  return impl_->memoryUsage();
+}
+
+bool AbfsReadFile::shouldCoalesce() const {
+  return false;
+}
+
+std::string AbfsReadFile::getName() const {
+  return impl_->getName();
+}
+
+uint64_t AbfsReadFile::getNaturalReadSize() const {
+  return impl_->getNaturalReadSize();
+}
+
+AbfsFileSystem::AbfsFileSystem(std::shared_ptr<const config::ConfigBase> config)
+    : FileSystem(config) {
+  VELOX_CHECK_NOT_NULL(config.get());
+}
+
+std::string AbfsFileSystem::name() const {
+  return "ABFS";
+}
+
+std::unique_ptr<ReadFile> AbfsFileSystem::openFileForRead(
+    std::string_view path,
+    const FileOptions& options) {
+  auto abfsfile = std::make_unique<AbfsReadFile>(path, *config_);
+  abfsfile->initialize(options);
+  return abfsfile;
+}
+
+std::unique_ptr<WriteFile> AbfsFileSystem::openFileForWrite(
+    std::string_view path,
+    const FileOptions& /*unused*/) {
+  return std::make_unique<AbfsWriteFile>(path, *config_);
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.h
new file mode 100644
index 000000000000..c0d3d60ccdee
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/common/file/FileSystems.h"
+
+namespace facebook::velox::filesystems {
+
+/// Implementation of the ABS (Azure Blob Storage) filesystem and file
+/// interface. We provide a registration method for reading and writing files so
+/// that the appropriate type of file can be constructed based on a filename.
+/// The supported schema is `abfs(s)://` to align with the valid scheme
+/// identifiers used in the Hadoop Filesystem ABFS driver when integrating with
+/// Azure Blob Storage. One key difference here is that the ABFS Hadoop client
+/// driver always uses Transport Layer Security (TLS) regardless of the
+/// authentication method chosen when using the `abfss` schema, but not mandated
+/// when using the `abfs` schema. In our implementation, we always use the HTTPS
+/// protocol, regardless of whether the schema is `abfs://` or `abfss://`. The
+/// legacy wabs(s):// schema is not supported as it has been deprecated already
+/// by Azure Storage team. Reference document -
+/// https://learn.microsoft.com/en-us/azure/databricks/storage/azure-storage.
+class AbfsFileSystem : public FileSystem {
+ public:
+  explicit AbfsFileSystem(std::shared_ptr<const config::ConfigBase> config);
+
+  std::string name() const override;
+
+  std::unique_ptr<ReadFile> openFileForRead(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  std::unique_ptr<WriteFile> openFileForWrite(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  void rename(
+      std::string_view path,
+      std::string_view newPath,
+      bool overWrite = false) override {
+    VELOX_UNSUPPORTED("rename for abfs not implemented");
+  }
+
+  void remove(std::string_view path) override {
+    VELOX_UNSUPPORTED("remove for abfs not implemented");
+  }
+
+  bool exists(std::string_view path) override {
+    VELOX_UNSUPPORTED("exists for abfs not implemented");
+  }
+
+  std::vector<std::string> list(std::string_view path) override {
+    VELOX_UNSUPPORTED("list for abfs not implemented");
+  }
+
+  void mkdir(
+      std::string_view path,
+      const filesystems::DirectoryOptions& options = {}) override {
+    VELOX_UNSUPPORTED("mkdir for abfs not implemented");
+  }
+
+  void rmdir(std::string_view path) override {
+    VELOX_UNSUPPORTED("rmdir for abfs not implemented");
+  }
+};
+
+void registerAbfsFileSystem();
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsReadFile.h b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsReadFile.h
new file mode 100644
index 000000000000..942439c06c1e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsReadFile.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/common/file/File.h"
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::filesystems {
+class AbfsReadFile final : public ReadFile {
+ public:
+  explicit AbfsReadFile(
+      std::string_view path,
+      const config::ConfigBase& config);
+
+  void initialize(const FileOptions& options);
+
+  std::string_view pread(
+      uint64_t offset,
+      uint64_t length,
+      void* buf,
+      File::IoStats* stats = nullptr) const final;
+
+  std::string pread(
+      uint64_t offset,
+      uint64_t length,
+      File::IoStats* stats = nullptr) const final;
+
+  uint64_t preadv(
+      uint64_t offset,
+      const std::vector<folly::Range<char*>>& buffers,
+      File::IoStats* stats = nullptr) const final;
+
+  uint64_t preadv(
+      folly::Range<const common::Region*> regions,
+      folly::Range<folly::IOBuf*> iobufs,
+      File::IoStats* stats = nullptr) const final;
+
+  uint64_t size() const final;
+
+  uint64_t memoryUsage() const final;
+
+  bool shouldCoalesce() const final;
+
+  std::string getName() const final;
+
+  uint64_t getNaturalReadSize() const final;
+
+ protected:
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsUtil.h b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsUtil.h
new file mode 100644
index 000000000000..925c6f91ece9
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsUtil.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <azure/storage/common/storage_exception.hpp>
+#include <fmt/format.h>
+#include "velox/common/file/File.h"
+
+namespace facebook::velox::filesystems {
+namespace {
+constexpr std::string_view kAbfsScheme{"abfs://"};
+constexpr std::string_view kAbfssScheme{"abfss://"};
+} // namespace
+
+inline bool isAbfsFile(const std::string_view filename) {
+  return filename.find(kAbfsScheme) == 0 || filename.find(kAbfssScheme) == 0;
+}
+
+inline std::string throwStorageExceptionWithOperationDetails(
+    std::string operation,
+    std::string path,
+    Azure::Storage::StorageException& error) {
+  const auto errMsg = fmt::format(
+      "Operation '{}' to path '{}' encountered azure storage exception, Details: '{}'.",
+      operation,
+      path,
+      error.what());
+  if (error.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+    VELOX_FILE_NOT_FOUND_ERROR(errMsg);
+  }
+  VELOX_FAIL(errMsg);
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.cpp
new file mode 100644
index 000000000000..995bb4dffa99
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AbfsWriteFile.h"
+#include "AbfsConfig.h"
+#include "AbfsUtil.h"
+
+namespace facebook::velox::filesystems {
+
+class AbfsWriteFile::Impl {
+ public:
+  explicit Impl(
+      std::string_view path,
+      std::unique_ptr<AzureDataLakeFileClient>& client)
+      : path_(path), client_(std::move(client)) {
+    // Make it a no-op if invoked twice.
+    if (position_ != -1) {
+      return;
+    }
+    position_ = 0;
+    VELOX_CHECK(!checkIfFileExists(), "File already exists");
+    client_->create();
+  }
+
+  void close() {
+    if (!closed_) {
+      flush();
+      closed_ = true;
+    }
+  }
+
+  void flush() {
+    if (!closed_) {
+      client_->flush(position_);
+    }
+  }
+
+  void append(std::string_view data) {
+    VELOX_CHECK(!closed_, "File is not open");
+    if (data.size() == 0) {
+      return;
+    }
+    append(data.data(), data.size());
+  }
+
+  uint64_t size() const {
+    return client_->getProperties().FileSize;
+  }
+
+  void append(const char* buffer, size_t size) {
+    client_->append(reinterpret_cast<const uint8_t*>(buffer), size, position_);
+    position_ += size;
+  }
+
+ private:
+  bool checkIfFileExists() {
+    try {
+      client_->getProperties();
+      return true;
+    } catch (Azure::Storage::StorageException& e) {
+      if (e.StatusCode != Azure::Core::Http::HttpStatusCode::NotFound) {
+        throwStorageExceptionWithOperationDetails("GetProperties", path_, e);
+      }
+      return false;
+    }
+  }
+
+  const std::string path_;
+  const std::unique_ptr<AzureDataLakeFileClient> client_;
+
+  uint64_t position_ = -1;
+  bool closed_ = false;
+};
+
+AbfsWriteFile::AbfsWriteFile(
+    std::string_view path,
+    const config::ConfigBase& config) {
+  auto abfsConfig = AbfsConfig(path, config);
+  auto client = abfsConfig.getWriteFileClient();
+  impl_ = std::make_unique<Impl>(path, client);
+}
+
+AbfsWriteFile::AbfsWriteFile(
+    std::string_view path,
+    std::unique_ptr<AzureDataLakeFileClient>& client) {
+  impl_ = std::make_unique<Impl>(path, client);
+}
+
+AbfsWriteFile::~AbfsWriteFile() {}
+
+void AbfsWriteFile::close() {
+  impl_->close();
+}
+
+void AbfsWriteFile::flush() {
+  impl_->flush();
+}
+
+void AbfsWriteFile::append(std::string_view data) {
+  impl_->append(data);
+}
+
+uint64_t AbfsWriteFile::size() const {
+  return impl_->size();
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.h b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.h
new file mode 100644
index 000000000000..cb795970ceb1
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "AzureDataLakeFileClient.h"
+#include "velox/common/file/File.h"
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::filesystems {
+
+/// We are using the DFS (Data Lake Storage) endpoint for Azure Blob File write
+/// operations because the DFS endpoint is designed to be compatible with file
+/// operation semantics, such as `Append` to a file and file `Flush` operations.
+/// The legacy Blob endpoint can only be used for blob level append and flush
+/// operations. When using the Blob endpoint, we would need to manually manage
+/// the creation, appending, and committing of file-related blocks.
+
+/// Implementation of abfs write file. Nothing written to the file should be
+/// read back until it is closed.
+class AbfsWriteFile : public WriteFile {
+ public:
+  constexpr static uint64_t kNaturalWriteSize = 8 << 20; // 8M
+
+  /// @param path The file path to write.
+  /// @param connectStr The connection string used to auth the storage account.
+  AbfsWriteFile(std::string_view path, const config::ConfigBase& config);
+
+  /// @param path The file path to write.
+  /// @param client The AdlsFileClient.
+  AbfsWriteFile(
+      std::string_view path,
+      std::unique_ptr<AzureDataLakeFileClient>& client);
+
+  ~AbfsWriteFile();
+
+  /// Get the file size.
+  uint64_t size() const override;
+
+  /// Flush the data.
+  void flush() override;
+
+  /// Write the data by append mode.
+  void append(std::string_view data) override;
+
+  /// Close the file.
+  void close() override;
+
+ protected:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/AzureDataLakeFileClient.h b/velox/connectors/lakehouse/storage_adapters/abfs/AzureDataLakeFileClient.h
new file mode 100644
index 000000000000..abd607c0d1b2
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/AzureDataLakeFileClient.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <cstddef>
+#include <string>
+
+namespace Azure::Storage::Files::DataLake::Models {
+class PathProperties;
+}
+
+namespace facebook::velox::filesystems {
+
+// Azurite Simulator does not yet support the DFS endpoint.
+// (For more information, see https://github.com/Azure/Azurite/issues/553 and
+// https://github.com/Azure/Azurite/issues/409).
+// You can find a comparison between DFS and Blob endpoints here:
+// https://github.com/Azure/Azurite/wiki/ADLS-Gen2-Implementation-Guidance
+// To facilitate unit testing of file write scenarios, we define the
+// AzureDatalakeFileClient which can be mocked during testing.
+
+class AzureDataLakeFileClient {
+ public:
+  virtual ~AzureDataLakeFileClient() {}
+
+  virtual void create() = 0;
+  virtual Azure::Storage::Files::DataLake::Models::PathProperties
+  getProperties() = 0;
+  virtual void append(const uint8_t* buffer, size_t size, uint64_t offset) = 0;
+  virtual void flush(uint64_t position) = 0;
+  virtual void close() = 0;
+  virtual std::string getUrl() const = 0;
+};
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/abfs/CMakeLists.txt
new file mode 100644
index 000000000000..2358e06ff7b4
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# for generated headers
+
+velox_add_library(velox_lakehouse_abfs RegisterAbfsFileSystem.cpp)
+
+if(VELOX_ENABLE_ABFS)
+  velox_sources(
+    velox_lakehouse_abfs
+    PRIVATE
+          AbfsFileSystem.cpp
+          AbfsConfig.cpp
+          AbfsWriteFile.cpp)
+
+  velox_link_libraries(
+    velox_lakehouse_abfs
+    PUBLIC velox_file
+           velox_core
+           velox_hiveV2_config
+           velox_dwio_common_exception
+           Azure::azure-identity
+           Azure::azure-storage-blobs
+           Azure::azure-storage-files-datalake
+           Folly::folly
+           glog::glog
+           fmt::fmt)
+
+  if(${VELOX_BUILD_TESTING})
+    add_subdirectory(tests)
+  endif()
+endif()
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.cpp
new file mode 100644
index 000000000000..e5b39b7074be
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef VELOX_ENABLE_ABFS
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.h" // @manual
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsUtil.h" // @manual
+#include "velox/dwio/common/FileSink.h"
+#endif
+
+namespace facebook::velox::filesystems {
+
+#ifdef VELOX_ENABLE_ABFS
+folly::once_flag abfsInitiationFlag;
+
+std::shared_ptr<FileSystem> abfsFileSystemGenerator(
+    std::shared_ptr<const config::ConfigBase> properties,
+    std::string_view filePath) {
+  static std::shared_ptr<FileSystem> filesystem;
+  folly::call_once(abfsInitiationFlag, [&properties]() {
+    filesystem = std::make_shared<AbfsFileSystem>(properties);
+  });
+  return filesystem;
+}
+
+std::unique_ptr<velox::dwio::common::FileSink> abfsWriteFileSinkGenerator(
+    const std::string& fileURI,
+    const velox::dwio::common::FileSink::Options& options) {
+  if (isAbfsFile(fileURI)) {
+    auto fileSystem =
+        filesystems::getFileSystem(fileURI, options.connectorProperties);
+    return std::make_unique<dwio::common::WriteFileSink>(
+        fileSystem->openFileForWrite(fileURI),
+        fileURI,
+        options.metricLogger,
+        options.stats);
+  }
+  return nullptr;
+}
+#endif
+
+void registerAbfsFileSystem() {
+#ifdef VELOX_ENABLE_ABFS
+  registerFileSystem(isAbfsFile, std::function(abfsFileSystemGenerator));
+  dwio::common::FileSink::registerFactory(
+      std::function(abfsWriteFileSinkGenerator));
+#endif
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.h
new file mode 100644
index 000000000000..cbe6758808a0
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/RegisterAbfsFileSystem.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace facebook::velox::filesystems {
+
+// Register the ABFS filesystem.
+void registerAbfsFileSystem();
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsCommonTest.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsCommonTest.cpp
new file mode 100644
index 000000000000..1c9da810ef87
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsCommonTest.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsUtil.h"
+
+#include "gtest/gtest.h"
+
+using namespace facebook::velox::filesystems;
+using namespace facebook::velox;
+
+TEST(AbfsUtilsTest, isAbfsFile) {
+  EXPECT_FALSE(isAbfsFile("abfs:"));
+  EXPECT_FALSE(isAbfsFile("abfss:"));
+  EXPECT_FALSE(isAbfsFile("abfs:/"));
+  EXPECT_FALSE(isAbfsFile("abfss:/"));
+  EXPECT_TRUE(isAbfsFile("abfs://test@test.dfs.core.windows.net/test"));
+  EXPECT_TRUE(isAbfsFile("abfss://test@test.dfs.core.windows.net/test"));
+}
+
+TEST(AbfsConfigTest, authType) {
+  const config::ConfigBase config(
+      {{"fs.azure.account.auth.type.efg.dfs.core.windows.net", "Custom"},
+       {"fs.azure.account.key.efg.dfs.core.windows.net", "456"}},
+      false);
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@efg.dfs.core.windows.net/test.txt", config),
+      "Unsupported auth type Custom, supported auth types are SharedKey, OAuth and SAS.");
+}
+
+TEST(AbfsConfigTest, clientSecretOAuth) {
+  const config::ConfigBase config(
+      {{"fs.azure.account.auth.type.efg.dfs.core.windows.net", "OAuth"},
+       {"fs.azure.account.auth.type.bar1.dfs.core.windows.net", "OAuth"},
+       {"fs.azure.account.auth.type.bar2.dfs.core.windows.net", "OAuth"},
+       {"fs.azure.account.auth.type.bar3.dfs.core.windows.net", "OAuth"},
+       {"fs.azure.account.oauth2.client.id.efg.dfs.core.windows.net", "test"},
+       {"fs.azure.account.oauth2.client.secret.efg.dfs.core.windows.net",
+        "test"},
+       {"fs.azure.account.oauth2.client.endpoint.efg.dfs.core.windows.net",
+        "https://login.microsoftonline.com/{TENANTID}/oauth2/token"},
+       {"fs.azure.account.oauth2.client.id.bar2.dfs.core.windows.net", "test"},
+       {"fs.azure.account.oauth2.client.id.bar3.dfs.core.windows.net", "test"},
+       {"fs.azure.account.oauth2.client.secret.bar3.dfs.core.windows.net",
+        "test"}},
+      false);
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@bar1.dfs.core.windows.net/test.txt", config),
+      "Config fs.azure.account.oauth2.client.id.bar1.dfs.core.windows.net not found");
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@bar2.dfs.core.windows.net/test.txt", config),
+      "Config fs.azure.account.oauth2.client.secret.bar2.dfs.core.windows.net not found");
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@bar3.dfs.core.windows.net/test.txt", config),
+      "Config fs.azure.account.oauth2.client.endpoint.bar3.dfs.core.windows.net not found");
+  auto abfsConfig =
+      AbfsConfig("abfss://abc@efg.dfs.core.windows.net/file/test.txt", config);
+  EXPECT_EQ(abfsConfig.tenentId(), "{TENANTID}");
+  EXPECT_EQ(abfsConfig.authorityHost(), "https://login.microsoftonline.com/");
+  auto readClient = abfsConfig.getReadFileClient();
+  EXPECT_EQ(
+      readClient->GetUrl(),
+      "https://efg.blob.core.windows.net/abc/file/test.txt");
+  auto writeClient = abfsConfig.getWriteFileClient();
+  // GetUrl retrieves the value from the internal blob client, which represents
+  // the blob's path as well.
+  EXPECT_EQ(
+      writeClient->getUrl(),
+      "https://efg.blob.core.windows.net/abc/file/test.txt");
+}
+
+TEST(AbfsConfigTest, sasToken) {
+  const config::ConfigBase config(
+      {{"fs.azure.account.auth.type.efg.dfs.core.windows.net", "SAS"},
+       {"fs.azure.account.auth.type.bar.dfs.core.windows.net", "SAS"},
+       {"fs.azure.sas.fixed.token.bar.dfs.core.windows.net", "sas=test"}},
+      false);
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@efg.dfs.core.windows.net/test.txt", config),
+      "Config fs.azure.sas.fixed.token.efg.dfs.core.windows.net not found");
+  auto abfsConfig =
+      AbfsConfig("abfs://abc@bar.dfs.core.windows.net/file", config);
+  auto readClient = abfsConfig.getReadFileClient();
+  EXPECT_EQ(
+      readClient->GetUrl(),
+      "http://bar.blob.core.windows.net/abc/file?sas=test");
+  auto writeClient = abfsConfig.getWriteFileClient();
+  // GetUrl retrieves the value from the internal blob client, which represents
+  // the blob's path as well.
+  EXPECT_EQ(
+      writeClient->getUrl(),
+      "http://bar.blob.core.windows.net/abc/file?sas=test");
+}
+
+TEST(AbfsConfigTest, sharedKey) {
+  const config::ConfigBase config(
+      {{"fs.azure.account.key.efg.dfs.core.windows.net", "123"},
+       {"fs.azure.account.auth.type.efg.dfs.core.windows.net", "SharedKey"},
+       {"fs.azure.account.key.foobar.dfs.core.windows.net", "456"},
+       {"fs.azure.account.key.bar.dfs.core.windows.net", "789"}},
+      false);
+
+  auto abfsConfig =
+      AbfsConfig("abfs://abc@efg.dfs.core.windows.net/file", config);
+  EXPECT_EQ(abfsConfig.fileSystem(), "abc");
+  EXPECT_EQ(abfsConfig.filePath(), "file");
+  EXPECT_EQ(
+      abfsConfig.connectionString(),
+      "DefaultEndpointsProtocol=http;AccountName=efg;AccountKey=123;EndpointSuffix=core.windows.net;");
+
+  auto abfssConfig = AbfsConfig(
+      "abfss://abc@foobar.dfs.core.windows.net/sf_1/store_sales/ss_sold_date_sk=2450816/part-00002-a29c25f1-4638-494e-8428-a84f51dcea41.c000.snappy.parquet",
+      config);
+  EXPECT_EQ(abfssConfig.fileSystem(), "abc");
+  EXPECT_EQ(
+      abfssConfig.filePath(),
+      "sf_1/store_sales/ss_sold_date_sk=2450816/part-00002-a29c25f1-4638-494e-8428-a84f51dcea41.c000.snappy.parquet");
+  EXPECT_EQ(
+      abfssConfig.connectionString(),
+      "DefaultEndpointsProtocol=https;AccountName=foobar;AccountKey=456;EndpointSuffix=core.windows.net;");
+
+  // Test with special character space.
+  auto abfssConfigWithSpecialCharacters = AbfsConfig(
+      "abfss://foo@bar.dfs.core.windows.net/main@dir/sub dir/test.txt", config);
+
+  EXPECT_EQ(abfssConfigWithSpecialCharacters.fileSystem(), "foo");
+  EXPECT_EQ(
+      abfssConfigWithSpecialCharacters.filePath(), "main@dir/sub dir/test.txt");
+
+  VELOX_ASSERT_USER_THROW(
+      std::make_unique<AbfsConfig>(
+          "abfss://foo@otheraccount.dfs.core.windows.net/test.txt", config),
+      "Config fs.azure.account.key.otheraccount.dfs.core.windows.net not found");
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp
new file mode 100644
index 000000000000..761ffa87809f
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <atomic>
+#include <filesystem>
+#include <random>
+
+#include "AzuriteServer.h"
+#include "MockDataLakeFileClient.h"
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/config/Config.h"
+#include "velox/common/file/File.h"
+#include "velox/common/file/FileSystems.h"
+#include "velox/connectors/hive/FileHandle.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsFileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsReadFile.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsWriteFile.h"
+#include "velox/dwio/common/FileSink.h"
+#include "velox/exec/tests/utils/PortUtil.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::filesystems;
+using ::facebook::velox::common::Region;
+
+constexpr int kOneMB = 1 << 20;
+
+class AbfsFileSystemTest : public testing::Test {
+ public:
+  std::shared_ptr<AzuriteServer> azuriteServer_;
+  std::unique_ptr<AbfsFileSystem> abfs_;
+
+  static void SetUpTestCase() {
+    registerAbfsFileSystem();
+    AbfsConfig::setUpTestWriteClient(
+        []() { return std::make_unique<MockDataLakeFileClient>(); });
+  }
+
+  static void TearDownTestSuite() {
+    AbfsConfig::tearDownTestWriteClient();
+  }
+
+  void SetUp() override {
+    auto port = facebook::velox::exec::test::getFreePort();
+    azuriteServer_ = std::make_shared<AzuriteServer>(port);
+    azuriteServer_->start();
+    auto tempFile = createFile();
+    azuriteServer_->addFile(tempFile->getPath());
+    abfs_ = std::make_unique<AbfsFileSystem>(azuriteServer_->hiveConfig());
+  }
+
+  void TearDown() override {
+    azuriteServer_->stop();
+  }
+
+  static std::string generateRandomData(int size) {
+    static const char charset[] =
+        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+    std::string data(size, ' ');
+
+    for (int i = 0; i < size; ++i) {
+      int index = rand() % (sizeof(charset) - 1);
+      data[i] = charset[index];
+    }
+
+    return data;
+  }
+
+ private:
+  static std::shared_ptr<::exec::test::TempFilePath> createFile() {
+    auto tempFile = exec::test::TempFilePath::create();
+    tempFile->append("aaaaa");
+    tempFile->append("bbbbb");
+    tempFile->append(std::string(kOneMB, 'c'));
+    tempFile->append("ddddd");
+    return tempFile;
+  }
+};
+
+void readData(ReadFile* readFile) {
+  ASSERT_EQ(readFile->size(), 15 + kOneMB);
+  char buffer1[5];
+  ASSERT_EQ(readFile->pread(10 + kOneMB, 5, &buffer1), "ddddd");
+  char buffer2[10];
+  ASSERT_EQ(readFile->pread(0, 10, &buffer2), "aaaaabbbbb");
+  char buffer3[kOneMB];
+  ASSERT_EQ(readFile->pread(10, kOneMB, buffer3), std::string(kOneMB, 'c'));
+  ASSERT_EQ(readFile->size(), 15 + kOneMB);
+  char buffer4[10];
+  const std::string_view arf = readFile->pread(5, 10, &buffer4);
+  const std::string zarf = readFile->pread(kOneMB, 15);
+  auto buf = std::make_unique<char[]>(8);
+  const std::string_view warf = readFile->pread(4, 8, buf.get());
+  const std::string_view warfFromBuf(buf.get(), 8);
+  ASSERT_EQ(arf, "bbbbbccccc");
+  ASSERT_EQ(zarf, "ccccccccccddddd");
+  ASSERT_EQ(warf, "abbbbbcc");
+  ASSERT_EQ(warfFromBuf, "abbbbbcc");
+
+  char buff1[10];
+  char buff2[10];
+  std::vector<folly::Range<char*>> buffers = {
+      folly::Range<char*>(buff1, 10),
+      folly::Range<char*>(nullptr, kOneMB - 5),
+      folly::Range<char*>(buff2, 10)};
+  ASSERT_EQ(10 + kOneMB - 5 + 10, readFile->preadv(0, buffers));
+  ASSERT_EQ(std::string_view(buff1, sizeof(buff1)), "aaaaabbbbb");
+  ASSERT_EQ(std::string_view(buff2, sizeof(buff2)), "cccccddddd");
+
+  std::vector<folly::IOBuf> iobufs(2);
+  std::vector<Region> regions = {{0, 10}, {10, 5}};
+  ASSERT_EQ(
+      10 + 5,
+      readFile->preadv(
+          {regions.data(), regions.size()}, {iobufs.data(), iobufs.size()}));
+  ASSERT_EQ(
+      std::string_view(
+          reinterpret_cast<const char*>(iobufs[0].writableData()),
+          iobufs[0].length()),
+      "aaaaabbbbb");
+  ASSERT_EQ(
+      std::string_view(
+          reinterpret_cast<const char*>(iobufs[1].writableData()),
+          iobufs[1].length()),
+      "ccccc");
+}
+
+TEST_F(AbfsFileSystemTest, readFile) {
+  auto readFile = abfs_->openFileForRead(azuriteServer_->fileURI());
+  readData(readFile.get());
+}
+
+TEST_F(AbfsFileSystemTest, openFileForReadWithOptions) {
+  FileOptions options;
+  options.fileSize = 15 + kOneMB;
+  auto readFile = abfs_->openFileForRead(azuriteServer_->fileURI(), options);
+  readData(readFile.get());
+}
+
+TEST_F(AbfsFileSystemTest, openFileForReadWithInvalidOptions) {
+  FileOptions options;
+  options.fileSize = -kOneMB;
+  VELOX_ASSERT_THROW(
+      abfs_->openFileForRead(azuriteServer_->fileURI(), options),
+      "File size must be non-negative");
+}
+
+TEST_F(AbfsFileSystemTest, fileHandleWithProperties) {
+  FileHandleFactory factory(
+      std::make_unique<SimpleLRUCache<std::string, FileHandle>>(1),
+      std::make_unique<FileHandleGenerator>(azuriteServer_->hiveConfig()));
+  FileProperties properties = {15 + kOneMB, 1};
+  auto fileHandleProperties =
+      factory.generate(azuriteServer_->fileURI(), &properties);
+  readData(fileHandleProperties->file.get());
+
+  auto fileHandleWithoutProperties =
+      factory.generate(azuriteServer_->fileURI());
+  readData(fileHandleWithoutProperties->file.get());
+}
+
+TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) {
+  std::atomic<bool> startThreads = false;
+  std::vector<std::thread> threads;
+  std::mt19937 generator(std::random_device{}());
+  std::vector<int> sleepTimesInMicroseconds = {0, 500, 5000};
+  std::uniform_int_distribution<std::size_t> distribution(
+      0, sleepTimesInMicroseconds.size() - 1);
+  for (int i = 0; i < 10; i++) {
+    auto thread = std::thread([&] {
+      int index = distribution(generator);
+      while (!startThreads) {
+        std::this_thread::yield();
+      }
+      std::this_thread::sleep_for(
+          std::chrono::microseconds(sleepTimesInMicroseconds[index]));
+      auto readFile = abfs_->openFileForRead(azuriteServer_->fileURI());
+      readData(readFile.get());
+    });
+    threads.emplace_back(std::move(thread));
+  }
+  startThreads = true;
+  for (auto& thread : threads) {
+    thread.join();
+  }
+}
+
+TEST_F(AbfsFileSystemTest, missingFile) {
+  const std::string abfsFile = azuriteServer_->URI() + "test.txt";
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      abfs_->openFileForRead(abfsFile), error_code::kFileNotFound, "404");
+}
+
+TEST(AbfsWriteFileTest, openFileForWriteTest) {
+  std::string_view kAbfsFile =
+      "abfs://test@test.dfs.core.windows.net/test/writetest.txt";
+  std::unique_ptr<AzureDataLakeFileClient> mockClient =
+      std::make_unique<MockDataLakeFileClient>();
+  auto mockClientPath =
+      reinterpret_cast<MockDataLakeFileClient*>(mockClient.get())->path();
+  AbfsWriteFile abfsWriteFile(kAbfsFile, mockClient);
+  EXPECT_EQ(abfsWriteFile.size(), 0);
+  std::string dataContent = "";
+  uint64_t totalSize = 0;
+  std::string randomData =
+      AbfsFileSystemTest::generateRandomData(1 * 1024 * 1024);
+  for (int i = 0; i < 8; ++i) {
+    abfsWriteFile.append(randomData);
+    dataContent += randomData;
+  }
+  totalSize = randomData.size() * 8;
+  abfsWriteFile.flush();
+  EXPECT_EQ(abfsWriteFile.size(), totalSize);
+
+  randomData = AbfsFileSystemTest::generateRandomData(9 * 1024 * 1024);
+  dataContent += randomData;
+  abfsWriteFile.append(randomData);
+  totalSize += randomData.size();
+  randomData = AbfsFileSystemTest::generateRandomData(2 * 1024 * 1024);
+  dataContent += randomData;
+  totalSize += randomData.size();
+  abfsWriteFile.append(randomData);
+  abfsWriteFile.flush();
+  EXPECT_EQ(abfsWriteFile.size(), totalSize);
+  abfsWriteFile.flush();
+  abfsWriteFile.close();
+  VELOX_ASSERT_THROW(abfsWriteFile.append("abc"), "File is not open");
+
+  std::unique_ptr<AzureDataLakeFileClient> mockClientCopy =
+      std::make_unique<MockDataLakeFileClient>(mockClientPath);
+  VELOX_ASSERT_THROW(
+      AbfsWriteFile(kAbfsFile, mockClientCopy), "File already exists");
+  MockDataLakeFileClient readClient(mockClientPath);
+  auto fileContent = readClient.readContent();
+  ASSERT_EQ(fileContent.size(), dataContent.size());
+  ASSERT_EQ(fileContent, dataContent);
+}
+
+TEST_F(AbfsFileSystemTest, renameNotImplemented) {
+  VELOX_ASSERT_THROW(
+      abfs_->rename("text", "text2"), "rename for abfs not implemented");
+}
+
+TEST_F(AbfsFileSystemTest, notImplemented) {
+  VELOX_ASSERT_THROW(abfs_->remove("text"), "remove for abfs not implemented");
+  VELOX_ASSERT_THROW(abfs_->exists("text"), "exists for abfs not implemented");
+  VELOX_ASSERT_THROW(abfs_->list("dir"), "list for abfs not implemented");
+  VELOX_ASSERT_THROW(abfs_->mkdir("dir"), "mkdir for abfs not implemented");
+  VELOX_ASSERT_THROW(abfs_->rmdir("dir"), "rmdir for abfs not implemented");
+}
+
+TEST_F(AbfsFileSystemTest, credNotFOund) {
+  const std::string abfsFile =
+      std::string("abfs://test@test1.dfs.core.windows.net/test");
+  VELOX_ASSERT_THROW(
+      abfs_->openFileForRead(abfsFile),
+      "Config fs.azure.account.key.test1.dfs.core.windows.net not found");
+}
+
+TEST_F(AbfsFileSystemTest, registerAbfsFileSink) {
+  static const std::vector<std::string> paths = {
+      "abfs://test@test.dfs.core.windows.net/test",
+      "abfss://test@test.dfs.core.windows.net/test"};
+  std::unordered_map<std::string, std::string> config(
+      {{"fs.azure.account.key.test.dfs.core.windows.net", "NDU2"}});
+  auto hiveConfig =
+      std::make_shared<const config::ConfigBase>(std::move(config));
+  for (const auto& path : paths) {
+    auto sink = dwio::common::FileSink::create(
+        path, {.connectorProperties = hiveConfig});
+    auto writeFileSink = dynamic_cast<dwio::common::WriteFileSink*>(sink.get());
+    auto writeFile = writeFileSink->toWriteFile();
+    auto abfsWriteFile = dynamic_cast<AbfsWriteFile*>(writeFile.get());
+    ASSERT_TRUE(abfsWriteFile != nullptr);
+  }
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.cpp
new file mode 100644
index 000000000000..c7cd03ccd4c2
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AzuriteServer.h"
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AbfsConfig.h"
+
+namespace facebook::velox::filesystems {
+
+std::string AzuriteServer::URI() const {
+  return fmt::format(
+      "abfs://{}@{}.dfs.core.windows.net/", container_, account_);
+}
+
+std::string AzuriteServer::fileURI() const {
+  return fmt::format(
+      "abfs://{}@{}.dfs.core.windows.net/{}", container_, account_, file_);
+}
+
+// Return the hiveConfig for the Azurite instance.
+// Specify configOverride map to update the default config map.
+std::shared_ptr<const config::ConfigBase> AzuriteServer::hiveConfig(
+    const std::unordered_map<std::string, std::string> configOverride) const {
+  auto endpoint = fmt::format("http://127.0.0.1:{}/{}", port_, account_);
+  std::unordered_map<std::string, std::string> config(
+      {{"fs.azure.account.key.test.dfs.core.windows.net", key_},
+       {kAzureBlobEndpoint, endpoint}});
+
+  for (const auto& [key, value] : configOverride) {
+    config[key] = value;
+  }
+
+  return std::make_shared<const config::ConfigBase>(std::move(config));
+}
+
+void AzuriteServer::start() {
+  try {
+    serverProcess_ = std::make_unique<boost::process::child>(
+        env_, exePath_, commandOptions_);
+    serverProcess_->wait_for(std::chrono::duration<int, std::milli>(5000));
+    VELOX_CHECK_EQ(
+        serverProcess_->exit_code(),
+        383,
+        "AzuriteServer process exited, code: ",
+        serverProcess_->exit_code());
+  } catch (const std::exception& e) {
+    VELOX_FAIL("Failed to launch Azurite server: {}", e.what());
+  }
+}
+
+void AzuriteServer::stop() {
+  if (serverProcess_ && serverProcess_->valid()) {
+    serverProcess_->terminate();
+    serverProcess_->wait();
+    serverProcess_.reset();
+  }
+}
+
+bool AzuriteServer::isRunning() {
+  if (serverProcess_) {
+    return true;
+  }
+  return false;
+}
+
+// requires azurite executable to be on the PATH
+AzuriteServer::AzuriteServer(int64_t port) : port_(port) {
+  std::string dataLocation = fmt::format("/tmp/azurite_{}", port);
+  std::string logFilePath = fmt::format("/tmp/azurite/azurite_{}.log", port);
+  std::printf(
+      "Launch azurite instance with port - %s, data location - %s, log file path - %s\n",
+      std::to_string(port).c_str(),
+      dataLocation.c_str(),
+      logFilePath.c_str());
+  commandOptions_ = {
+      "--silent",
+      "--blobPort",
+      std::to_string(port),
+      "--location",
+      dataLocation,
+      "--debug",
+      logFilePath,
+  };
+  env_ = (boost::process::environment)boost::this_process::environment();
+  env_["PATH"] = env_["PATH"].to_string() + std::string(kAzuriteSearchPath);
+  env_["AZURITE_ACCOUNTS"] = fmt::format("{}:{}", account_, key_);
+  auto path = env_["PATH"].to_vector();
+  exePath_ = boost::process::search_path(
+      kAzuriteServerExecutableName,
+      std::vector<boost::filesystem::path>(path.begin(), path.end()));
+  std::printf("AzuriteServer executable path: %s\n", exePath_.c_str());
+  if (exePath_.empty()) {
+    VELOX_FAIL(
+        "Failed to find azurite executable {}'", kAzuriteServerExecutableName);
+  }
+}
+
+void AzuriteServer::addFile(std::string source) {
+  AbfsConfig conf(fileURI(), *hiveConfig());
+  auto containerClient = BlobContainerClient::CreateFromConnectionString(
+      conf.connectionString(), container_);
+  containerClient.CreateIfNotExists();
+  auto blobClient = containerClient.GetBlockBlobClient(file_);
+  blobClient.UploadFrom(source);
+}
+
+AzuriteServer::~AzuriteServer() {
+  // stop();
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.h b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.h
new file mode 100644
index 000000000000..037c4d9f38d4
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/AzuriteServer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/config/Config.h"
+#include "velox/exec/tests/utils/TempDirectoryPath.h"
+
+#include <azure/storage/blobs/blob_container_client.hpp>
+#include <azure/storage/blobs/block_blob_client.hpp>
+#include <fmt/format.h>
+#include <pwd.h>
+#include <unistd.h>
+#include <iostream>
+#include "boost/process.hpp"
+
+namespace facebook::velox::filesystems {
+
+using namespace Azure::Storage::Blobs;
+static std::string_view kAzuriteServerExecutableName{"azurite-blob"};
+static std::string_view kAzuriteSearchPath{":/usr/bin/azurite"};
+
+class AzuriteServer {
+ public:
+  AzuriteServer(int64_t port);
+
+  const std::string connectionStr() const;
+
+  void start();
+
+  std::string URI() const;
+
+  std::string fileURI() const;
+
+  std::string container() const {
+    return container_;
+  }
+
+  std::string file() const {
+    return file_;
+  }
+
+  std::shared_ptr<const config::ConfigBase> hiveConfig(
+      const std::unordered_map<std::string, std::string> configOverride = {})
+      const;
+
+  void stop();
+
+  bool isRunning();
+
+  void addFile(std::string source);
+
+  virtual ~AzuriteServer();
+
+ private:
+  int64_t port_;
+  const std::string account_{"test"};
+  const std::string container_{"test"};
+  const std::string file_{"test_file.txt"};
+  const std::string key_{
+      "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="};
+  std::vector<std::string> commandOptions_;
+  std::unique_ptr<::boost::process::child> serverProcess_;
+  boost::filesystem::path exePath_;
+  boost::process::environment env_;
+};
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/abfs/tests/CMakeLists.txt
new file mode 100644
index 000000000000..2a2402bcc8fe
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_lakehouse_abfs_test AbfsFileSystemTest.cpp AbfsCommonTest.cpp
+        AzuriteServer.cpp MockDataLakeFileClient.cpp)
+
+add_test(velox_lakehouse_abfs_test velox_lakehouse_abfs_test)
+target_link_libraries(
+  velox_lakehouse_abfs_test
+  PRIVATE velox_lakehouse_abfs velox_exec_test_lib GTest::gtest GTest::gtest_main)
+
+target_compile_options(velox_lakehouse_abfs_test PRIVATE -Wno-deprecated-declarations)
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.cpp b/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.cpp
new file mode 100644
index 000000000000..f392a691f081
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MockDataLakeFileClient.h"
+
+#include <filesystem>
+
+#include <azure/storage/files/datalake.hpp>
+
+namespace facebook::velox::filesystems {
+
+void MockDataLakeFileClient::create() {
+  fileStream_ = std::ofstream(
+      filePath_,
+      std::ios_base::out | std::ios_base::binary | std::ios_base::app);
+}
+
+PathProperties MockDataLakeFileClient::getProperties() {
+  if (!std::filesystem::exists(filePath_)) {
+    Azure::Storage::StorageException exp(filePath_ + "doesn't exists");
+    exp.StatusCode = Azure::Core::Http::HttpStatusCode::NotFound;
+    throw exp;
+  }
+  std::ifstream file(filePath_, std::ios::binary | std::ios::ate);
+  uint64_t size = static_cast<uint64_t>(file.tellg());
+  PathProperties ret;
+  ret.FileSize = size;
+  return ret;
+}
+
+void MockDataLakeFileClient::append(
+    const uint8_t* buffer,
+    size_t size,
+    uint64_t offset) {
+  fileStream_.seekp(offset);
+  fileStream_.write(reinterpret_cast<const char*>(buffer), size);
+}
+
+void MockDataLakeFileClient::flush(uint64_t position) {
+  fileStream_.flush();
+}
+
+void MockDataLakeFileClient::close() {
+  fileStream_.flush();
+  fileStream_.close();
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.h b/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.h
new file mode 100644
index 000000000000..4625597ac8bb
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/abfs/tests/MockDataLakeFileClient.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/exec/tests/utils/TempFilePath.h"
+
+#include "velox/connectors/lakehouse/storage_adapters/abfs/AzureDataLakeFileClient.h"
+
+using namespace Azure::Storage::Files::DataLake::Models;
+
+namespace facebook::velox::filesystems {
+
+// A mock AzureDataLakeFileClient backend with local file store.
+class MockDataLakeFileClient : public AzureDataLakeFileClient {
+ public:
+  MockDataLakeFileClient() {
+    auto tempFile = velox::exec::test::TempFilePath::create();
+    filePath_ = tempFile->getPath();
+  }
+
+  MockDataLakeFileClient(std::string_view filePath) : filePath_(filePath) {}
+
+  std::string_view path() const {
+    return filePath_;
+  }
+
+  void create() override;
+
+  PathProperties getProperties() override;
+
+  void append(const uint8_t* buffer, size_t size, uint64_t offset) override;
+
+  void flush(uint64_t position) override;
+
+  void close() override;
+
+  std::string getUrl() const override {
+    return "testUrl";
+  }
+
+  // for testing purpose to verify the written content if correct.
+  std::string readContent() {
+    std::ifstream inputFile(filePath_);
+    std::string content;
+    inputFile.seekg(0, std::ios::end);
+    std::streamsize fileSize = inputFile.tellg();
+    inputFile.seekg(0, std::ios::beg);
+    content.resize(fileSize);
+    inputFile.read(&content[0], fileSize);
+    inputFile.close();
+    return content;
+  }
+
+ private:
+  std::string filePath_;
+  std::ofstream fileStream_;
+};
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/gcs/CMakeLists.txt
new file mode 100644
index 000000000000..fc98baa8af65
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/CMakeLists.txt
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# for generated headers
+
+velox_add_library(velox_lakehouse_gcs RegisterGcsFileSystem.cpp)
+
+if(VELOX_ENABLE_GCS)
+  velox_sources(velox_lakehouse_gcs PRIVATE GcsFileSystem.cpp GcsUtil.cpp)
+  velox_link_libraries(velox_lakehouse_gcs velox_dwio_common Folly::folly
+                       google-cloud-cpp::storage)
+
+  if(${VELOX_BUILD_TESTING})
+    add_subdirectory(tests)
+  endif()
+
+  if(${VELOX_ENABLE_EXAMPLES})
+    add_subdirectory(examples)
+  endif()
+endif()
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.cpp
new file mode 100644
index 000000000000..a592e1571630
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.cpp
@@ -0,0 +1,457 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h"
+
+#include "velox/common/base/Exceptions.h"
+#include "velox/common/config/Config.h"
+#include "velox/common/file/File.h"
+//#include "velox/connectors/lakehouse/common/ConnectorConfigBase.h"
+#include "velox/connectors/hive/HiveConfig.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+#include "velox/core/QueryConfig.h"
+
+#include <fmt/format.h>
+#include <glog/logging.h>
+#include <memory>
+#include <stdexcept>
+
+#include <google/cloud/storage/client.h>
+
+namespace facebook::velox {
+namespace {
+namespace gcs = ::google::cloud::storage;
+namespace gc = ::google::cloud;
+// Reference: https://github.com/apache/arrow/issues/29916
+// Change the default upload buffer size. In general, sending larger buffers is
+// more efficient with GCS, as each buffer requires a roundtrip to the service.
+// With formatted output (when using `operator<<`), keeping a larger buffer in
+// memory before uploading makes sense.  With unformatted output (the only
+// choice given gcs::io::OutputStream's API) it is better to let the caller
+// provide as large a buffer as they want. The GCS C++ client library will
+// upload this buffer with zero copies if possible.
+auto constexpr kUploadBufferSize = 256 * 1024;
+
+inline void checkGcsStatus(
+    const gc::Status outcome,
+    const std::string_view& errorMsgPrefix,
+    const std::string& bucket,
+    const std::string& key) {
+  if (!outcome.ok()) {
+    const auto errMsg = fmt::format(
+        "{} due to: Path:'{}', SDK Error Type:{}, GCS Status Code:{},  Message:'{}'",
+        errorMsgPrefix,
+        gcsURI(bucket, key),
+        outcome.error_info().domain(),
+        getErrorStringFromGcsError(outcome.code()),
+        outcome.message());
+    if (outcome.code() == gc::StatusCode::kNotFound) {
+      VELOX_FILE_NOT_FOUND_ERROR(errMsg);
+    }
+    VELOX_FAIL(errMsg);
+  }
+}
+
+class GcsReadFile final : public ReadFile {
+ public:
+  GcsReadFile(const std::string& path, std::shared_ptr<gcs::Client> client)
+      : client_(std::move(client)) {
+    // assumption it's a proper path
+    setBucketAndKeyFromGcsPath(path, bucket_, key_);
+  }
+
+  // Gets the length of the file.
+  // Checks if there are any issues reading the file.
+  void initialize(const filesystems::FileOptions& options) {
+    if (options.fileSize.has_value()) {
+      VELOX_CHECK_GE(
+          options.fileSize.value(), 0, "File size must be non-negative");
+      length_ = options.fileSize.value();
+    }
+
+    // Make it a no-op if invoked twice.
+    if (length_ != -1) {
+      return;
+    }
+    // get metadata and initialize length
+    auto metadata = client_->GetObjectMetadata(bucket_, key_);
+    if (!metadata.ok()) {
+      checkGcsStatus(
+          metadata.status(),
+          "Failed to get metadata for GCS object",
+          bucket_,
+          key_);
+    }
+    length_ = (*metadata).size();
+    VELOX_CHECK_GE(length_, 0);
+  }
+
+  std::string_view pread(
+      uint64_t offset,
+      uint64_t length,
+      void* buffer,
+      filesystems::File::IoStats* stats = nullptr) const override {
+    preadInternal(offset, length, static_cast<char*>(buffer));
+    return {static_cast<char*>(buffer), length};
+  }
+
+  std::string pread(
+      uint64_t offset,
+      uint64_t length,
+      filesystems::File::IoStats* stats = nullptr) const override {
+    std::string result(length, 0);
+    char* position = result.data();
+    preadInternal(offset, length, position);
+    return result;
+  }
+
+  uint64_t preadv(
+      uint64_t offset,
+      const std::vector<folly::Range<char*>>& buffers,
+      filesystems::File::IoStats* stats = nullptr) const override {
+    // 'buffers' contains Ranges(data, size)  with some gaps (data = nullptr) in
+    // between. This call must populate the ranges (except gap ranges)
+    // sequentially starting from 'offset'. If a range pointer is nullptr, the
+    // data from stream of size range.size() will be skipped.
+    size_t length = 0;
+    for (const auto range : buffers) {
+      length += range.size();
+    }
+    std::string result(length, 0);
+    preadInternal(offset, length, static_cast<char*>(result.data()));
+    size_t resultOffset = 0;
+    for (auto range : buffers) {
+      if (range.data()) {
+        memcpy(range.data(), &(result.data()[resultOffset]), range.size());
+      }
+      resultOffset += range.size();
+    }
+    return length;
+  }
+
+  uint64_t size() const override {
+    return length_;
+  }
+
+  uint64_t memoryUsage() const override {
+    return sizeof(GcsReadFile) // this class
+        + sizeof(gcs::Client) // pointee
+        + kUploadBufferSize; // buffer size
+  }
+
+  bool shouldCoalesce() const final {
+    return false;
+  }
+
+  std::string getName() const override {
+    return key_;
+  }
+
+  uint64_t getNaturalReadSize() const override {
+    return kUploadBufferSize;
+  }
+
+ private:
+  // The assumption here is that "position" has space for at least "length"
+  // bytes.
+  void preadInternal(uint64_t offset, uint64_t length, char* position) const {
+    gcs::ObjectReadStream stream = client_->ReadObject(
+        bucket_, key_, gcs::ReadRange(offset, offset + length));
+    if (!stream) {
+      checkGcsStatus(
+          stream.status(), "Failed to get GCS object", bucket_, key_);
+    }
+
+    stream.read(position, length);
+    if (!stream) {
+      checkGcsStatus(
+          stream.status(), "Failed to get read object", bucket_, key_);
+    }
+    bytesRead_ += length;
+  }
+
+  std::shared_ptr<gcs::Client> client_;
+  std::string bucket_;
+  std::string key_;
+  std::atomic<int64_t> length_ = -1;
+};
+
+class GcsWriteFile final : public WriteFile {
+ public:
+  explicit GcsWriteFile(
+      const std::string& path,
+      std::shared_ptr<gcs::Client> client)
+      : client_(client) {
+    setBucketAndKeyFromGcsPath(path, bucket_, key_);
+  }
+
+  ~GcsWriteFile() {
+    close();
+  }
+
+  void initialize() {
+    // Make it a no-op if invoked twice.
+    if (size_ != -1) {
+      return;
+    }
+
+    // Check that it doesn't exist, if it does throw an error
+    auto object_metadata = client_->GetObjectMetadata(bucket_, key_);
+    VELOX_CHECK(!object_metadata.ok(), "File already exists");
+
+    auto stream = client_->WriteObject(bucket_, key_);
+    checkGcsStatus(
+        stream.last_status(),
+        "Failed to open GCS object for writing",
+        bucket_,
+        key_);
+    stream_ = std::move(stream);
+    size_ = 0;
+  }
+
+  void append(const std::string_view data) override {
+    VELOX_CHECK(isFileOpen(), "File is not open");
+    stream_ << data;
+    size_ += data.size();
+  }
+
+  void flush() override {
+    if (isFileOpen()) {
+      stream_.flush();
+    }
+  }
+
+  void close() override {
+    if (isFileOpen()) {
+      stream_.flush();
+      stream_.Close();
+      closed_ = true;
+    }
+  }
+
+  uint64_t size() const override {
+    return size_;
+  }
+
+ private:
+  inline bool isFileOpen() {
+    return (!closed_ && stream_.IsOpen());
+  }
+
+  gcs::ObjectWriteStream stream_;
+  std::shared_ptr<gcs::Client> client_;
+  std::string bucket_;
+  std::string key_;
+  std::atomic<int64_t> size_{-1};
+  std::atomic<bool> closed_{false};
+};
+} // namespace
+
+namespace filesystems {
+using namespace connector::hive;
+
+auto constexpr kGcsInvalidPath = "File {} is not a valid gcs file";
+
+class GcsFileSystem::Impl {
+ public:
+  Impl(const config::ConfigBase* config)
+      : connectorConfig_(
+            std::make_shared<HiveConfig>(std::make_shared<config::ConfigBase>(
+                config->rawConfigsCopy()))) {}
+
+  ~Impl() = default;
+
+  // Use the input Config parameters and initialize the GcsClient.
+  void initializeClient() {
+    constexpr std::string_view kHttpsScheme{"https://"};
+    auto options = gc::Options{};
+    auto endpointOverride = connectorConfig_->gcsEndpoint();
+    // Use secure credentials by default.
+    if (!endpointOverride.empty()) {
+      options.set<gcs::RestEndpointOption>(endpointOverride);
+      // Use Google default credentials if endpoint has https scheme.
+      if (endpointOverride.find(kHttpsScheme) == 0) {
+        options.set<gc::UnifiedCredentialsOption>(
+            gc::MakeGoogleDefaultCredentials());
+      } else {
+        options.set<gc::UnifiedCredentialsOption>(
+            gc::MakeInsecureCredentials());
+      }
+    } else {
+      options.set<gc::UnifiedCredentialsOption>(
+          gc::MakeGoogleDefaultCredentials());
+    }
+    options.set<gcs::UploadBufferSizeOption>(kUploadBufferSize);
+
+    auto max_retry_count = connectorConfig_->gcsMaxRetryCount();
+    if (max_retry_count) {
+      options.set<gcs::RetryPolicyOption>(
+          gcs::LimitedErrorCountRetryPolicy(max_retry_count.value()).clone());
+    }
+
+    auto max_retry_time = connectorConfig_->gcsMaxRetryTime();
+    if (max_retry_time) {
+      auto retry_time = std::chrono::duration_cast<std::chrono::milliseconds>(
+          facebook::velox::config::toDuration(max_retry_time.value()));
+      options.set<gcs::RetryPolicyOption>(
+          gcs::LimitedTimeRetryPolicy(retry_time).clone());
+    }
+
+    auto credFile = connectorConfig_->gcsCredentialsPath();
+    if (!credFile.empty() && std::filesystem::exists(credFile)) {
+      std::ifstream jsonFile(credFile, std::ios::in);
+      if (!jsonFile.is_open()) {
+        LOG(WARNING) << "Error opening file " << credFile;
+      } else {
+        std::stringstream credsBuffer;
+        credsBuffer << jsonFile.rdbuf();
+        auto creds = credsBuffer.str();
+        auto credentials = gc::MakeServiceAccountCredentials(std::move(creds));
+        options.set<gc::UnifiedCredentialsOption>(credentials);
+      }
+    } else {
+      LOG(WARNING)
+          << "Config hive.gcs.json-key-file-path is empty or key file path not found";
+    }
+
+    client_ = std::make_shared<gcs::Client>(options);
+  }
+
+  std::shared_ptr<gcs::Client> getClient() const {
+    return client_;
+  }
+
+ private:
+  const std::shared_ptr<ConnectorConfigBase> connectorConfig_;
+  std::shared_ptr<gcs::Client> client_;
+};
+
+GcsFileSystem::GcsFileSystem(std::shared_ptr<const config::ConfigBase> config)
+    : FileSystem(config) {
+  impl_ = std::make_shared<Impl>(config.get());
+}
+
+void GcsFileSystem::initializeClient() {
+  impl_->initializeClient();
+}
+
+std::unique_ptr<ReadFile> GcsFileSystem::openFileForRead(
+    std::string_view path,
+    const FileOptions& options) {
+  const auto gcspath = gcsPath(path);
+  auto gcsfile = std::make_unique<GcsReadFile>(gcspath, impl_->getClient());
+  gcsfile->initialize(options);
+  return gcsfile;
+}
+
+std::unique_ptr<WriteFile> GcsFileSystem::openFileForWrite(
+    std::string_view path,
+    const FileOptions& /*unused*/) {
+  const auto gcspath = gcsPath(path);
+  auto gcsfile = std::make_unique<GcsWriteFile>(gcspath, impl_->getClient());
+  gcsfile->initialize();
+  return gcsfile;
+}
+
+void GcsFileSystem::remove(std::string_view path) {
+  if (!isGcsFile(path)) {
+    VELOX_FAIL(kGcsInvalidPath, path);
+  }
+
+  // We assume 'path' is well-formed here.
+  std::string bucket;
+  std::string object;
+  const auto file = gcsPath(path);
+  setBucketAndKeyFromGcsPath(file, bucket, object);
+
+  if (!object.empty()) {
+    auto stat = impl_->getClient()->GetObjectMetadata(bucket, object);
+    if (!stat.ok()) {
+      checkGcsStatus(
+          stat.status(),
+          "Failed to get metadata for GCS object",
+          bucket,
+          object);
+    }
+  }
+  auto ret = impl_->getClient()->DeleteObject(bucket, object);
+  if (!ret.ok()) {
+    checkGcsStatus(
+        ret, "Failed to get metadata for GCS object", bucket, object);
+  }
+}
+
+bool GcsFileSystem::exists(std::string_view path) {
+  std::vector<std::string> result;
+  if (!isGcsFile(path))
+    VELOX_FAIL(kGcsInvalidPath, path);
+
+  // We assume 'path' is well-formed here.
+  const auto file = gcsPath(path);
+  std::string bucket;
+  std::string object;
+  setBucketAndKeyFromGcsPath(file, bucket, object);
+  using ::google::cloud::StatusOr;
+  StatusOr<gcs::BucketMetadata> metadata =
+      impl_->getClient()->GetBucketMetadata(bucket);
+
+  return metadata.ok();
+}
+
+std::vector<std::string> GcsFileSystem::list(std::string_view path) {
+  std::vector<std::string> result;
+  if (!isGcsFile(path))
+    VELOX_FAIL(kGcsInvalidPath, path);
+
+  // We assume 'path' is well-formed here.
+  const auto file = gcsPath(path);
+  std::string bucket;
+  std::string object;
+  setBucketAndKeyFromGcsPath(file, bucket, object);
+  for (auto&& metadata : impl_->getClient()->ListObjects(bucket)) {
+    if (!metadata.ok()) {
+      checkGcsStatus(
+          metadata.status(),
+          "Failed to get metadata for GCS object",
+          bucket,
+          object);
+    }
+    result.push_back(metadata->name());
+  }
+
+  return result;
+}
+
+std::string GcsFileSystem::name() const {
+  return "GCS";
+}
+
+void GcsFileSystem::rename(std::string_view, std::string_view, bool) {
+  VELOX_UNSUPPORTED("rename for GCS not implemented");
+}
+
+void GcsFileSystem::mkdir(
+    std::string_view path,
+    const DirectoryOptions& options) {
+  VELOX_UNSUPPORTED("mkdir for GCS not implemented");
+}
+
+void GcsFileSystem::rmdir(std::string_view path) {
+  VELOX_UNSUPPORTED("rmdir for GCS not implemented");
+}
+
+} // namespace filesystems
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h
new file mode 100644
index 000000000000..34daff8d6c64
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/common/file/FileSystems.h"
+
+namespace facebook::velox::filesystems {
+
+/// Implementation of GCS filesystem and file interface.
+/// We provide a registration method for read and write files so the appropriate
+/// type of file can be constructed based on a filename. See the
+/// (register|generate)ReadFile and (register|generate)WriteFile functions.
+class GcsFileSystem : public FileSystem {
+ public:
+  explicit GcsFileSystem(std::shared_ptr<const config::ConfigBase> config);
+
+  /// Initialize the google::cloud::storage::Client from the input Config
+  /// parameters.
+  void initializeClient();
+
+  /// Initialize a ReadFile
+  /// First the method google::cloud::storage::Client::GetObjectMetadata
+  /// is used to validate
+  /// [[https://cloud.google.com/storage/docs/samples/storage-get-metadata]]
+  /// then the method google::cloud::storage::Client::ReadObject
+  /// is used to read sequentially
+  /// [[https://cloud.google.com/storage/docs/samples/storage-stream-file-download]].
+  std::unique_ptr<ReadFile> openFileForRead(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  /// Initialize a WriteFile
+  /// First the method google::cloud::storage::Client::GetObjectMetadata
+  /// is used to validate
+  /// [[https://cloud.google.com/storage/docs/samples/storage-get-metadata]]
+  /// then the method google::cloud::storage::Client::WriteObject
+  /// is used to append sequentially
+  /// [[https://cloud.google.com/storage/docs/samples/storage-stream-file-upload]].
+  /// The default buffer size is currently 8 MiB
+  /// but this default value can change.
+  /// [[https://cloud.google.com/storage/docs/resumable-uploads]].
+  /// The in-memory buffer is kept until the instance is closed or there is an
+  /// excess of data. If any previously buffered data and the data to append are
+  /// larger than the maximum size of the internal buffer then the largest
+  /// amount of data that is a multiple of the upload quantum (256KiB) is
+  /// flushed. Any data in excess of a multiple of the upload quantum are
+  /// buffered for the next upload.
+  std::unique_ptr<WriteFile> openFileForWrite(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  /// Returns the name of the adapter (GCS)
+  std::string name() const override;
+
+  /// Unsupported
+  void remove(std::string_view path) override;
+
+  /// Check that the path exists by using
+  /// google::cloud::storage::Client::GetObjectMetadata
+  bool exists(std::string_view path) override;
+
+  /// List the objects associated to a path using
+  /// google::cloud::storage::Client::ListObjects
+  std::vector<std::string> list(std::string_view path) override;
+
+  /// Unsupported
+  void rename(std::string_view, std::string_view, bool) override;
+
+  /// Unsupported
+  void mkdir(std::string_view path, const DirectoryOptions& options = {})
+      override;
+
+  /// Unsupported
+  void rmdir(std::string_view path) override;
+
+ protected:
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.cpp
new file mode 100644
index 000000000000..6fb9319b1756
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+
+namespace facebook::velox {
+
+std::string getErrorStringFromGcsError(const google::cloud::StatusCode& code) {
+  using ::google::cloud::StatusCode;
+
+  switch (code) {
+    case StatusCode::kNotFound:
+      return "Resource not found";
+    case StatusCode::kPermissionDenied:
+      return "Access denied";
+    case StatusCode::kUnavailable:
+      return "Service unavailable";
+
+    default:
+      return "Unknown error";
+  }
+}
+
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h b/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h
new file mode 100644
index 000000000000..e49325ec85e3
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <google/cloud/storage/client.h>
+#include "velox/common/base/Exceptions.h"
+
+namespace facebook::velox {
+
+namespace {
+constexpr const char* kSep{"/"};
+constexpr std::string_view kGcsScheme{"gs://"};
+
+} // namespace
+
+std::string getErrorStringFromGcsError(const google::cloud::StatusCode& error);
+
+inline bool isGcsFile(const std::string_view filename) {
+  return (filename.substr(0, kGcsScheme.size()) == kGcsScheme);
+}
+
+inline void setBucketAndKeyFromGcsPath(
+    const std::string& path,
+    std::string& bucket,
+    std::string& key) {
+  auto firstSep = path.find_first_of(kSep);
+  bucket = path.substr(0, firstSep);
+  key = path.substr(firstSep + 1);
+}
+
+inline std::string gcsURI(std::string_view bucket) {
+  std::stringstream ss;
+  ss << kGcsScheme << bucket;
+  return ss.str();
+}
+
+inline std::string gcsURI(std::string_view bucket, std::string_view key) {
+  std::stringstream ss;
+  ss << kGcsScheme << bucket << kSep << key;
+  return ss.str();
+}
+
+inline std::string gcsPath(const std::string_view& path) {
+  // Remove the prefix gcs:// from the given path
+  return std::string(path.substr(kGcsScheme.length()));
+}
+
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.cpp
new file mode 100644
index 000000000000..94e54fbb7c91
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef VELOX_ENABLE_GCS
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+#include "velox/dwio/common/FileSink.h"
+#endif
+
+namespace facebook::velox::filesystems {
+
+#ifdef VELOX_ENABLE_GCS
+folly::once_flag GcsInstantiationFlag;
+
+std::function<std::shared_ptr<
+    FileSystem>(std::shared_ptr<const config::ConfigBase>, std::string_view)>
+gcsFileSystemGenerator() {
+  static auto filesystemGenerator =
+      [](std::shared_ptr<const config::ConfigBase> properties,
+         std::string_view filePath) {
+        // Only one instance of GCSFileSystem is supported for now (follow S3
+        // for now).
+        // TODO: Support multiple GCSFileSystem instances using a cache
+        // Initialize on first access and reuse after that.
+        static std::shared_ptr<FileSystem> gcsfs;
+        folly::call_once(GcsInstantiationFlag, [&properties]() {
+          std::shared_ptr<GcsFileSystem> fs;
+          if (properties != nullptr) {
+            fs = std::make_shared<GcsFileSystem>(properties);
+          } else {
+            fs = std::make_shared<GcsFileSystem>(
+                std::make_shared<config::ConfigBase>(
+                    std::unordered_map<std::string, std::string>()));
+          }
+          fs->initializeClient();
+          gcsfs = fs;
+        });
+        return gcsfs;
+      };
+  return filesystemGenerator;
+}
+
+std::unique_ptr<velox::dwio::common::FileSink> gcsWriteFileSinkGenerator(
+    const std::string& fileURI,
+    const velox::dwio::common::FileSink::Options& options) {
+  if (isGcsFile(fileURI)) {
+    auto fileSystem =
+        filesystems::getFileSystem(fileURI, options.connectorProperties);
+    return std::make_unique<dwio::common::WriteFileSink>(
+        fileSystem->openFileForWrite(fileURI, {{}, options.pool, std::nullopt}),
+        fileURI,
+        options.metricLogger,
+        options.stats);
+  }
+  return nullptr;
+}
+#endif
+
+void registerGcsFileSystem() {
+#ifdef VELOX_ENABLE_GCS
+  registerFileSystem(isGcsFile, gcsFileSystemGenerator());
+  dwio::common::FileSink::registerFactory(
+      std::function(gcsWriteFileSinkGenerator));
+#endif
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.h
new file mode 100644
index 000000000000..b0f668d6f413
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace facebook::velox::filesystems {
+
+// Register the GCS filesystem.
+void registerGcsFileSystem();
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/examples/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/gcs/examples/CMakeLists.txt
new file mode 100644
index 000000000000..2dc1c68ecd0f
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/examples/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_lakehouse_gcsfile_example GcsFileSystemExample.cpp)
+target_link_libraries(
+  velox_lakehouse_gcsfile_example
+  Folly::folly
+  velox_file
+  velox_lakehouse_gcs
+  velox_core
+  velox_hiveV2_connector
+  velox_dwio_common_exception
+  velox_exec)
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/examples/GcsFileSystemExample.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/examples/GcsFileSystemExample.cpp
new file mode 100644
index 000000000000..1c5c16e52d6a
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/examples/GcsFileSystemExample.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/common/config/Config.h"
+#include "velox/common/file/File.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h"
+
+#include <folly/init/Init.h>
+#include <gflags/gflags.h>
+#include <iostream>
+
+DEFINE_string(gcs_path, "", "Path of GCS bucket");
+DEFINE_string(gcs_max_retry_count, "", "Max retry count");
+DEFINE_string(gcs_max_retry_time, "", "Max retry time");
+
+auto newConfiguration() {
+  using namespace facebook::velox;
+  std::unordered_map<std::string, std::string> configOverride = {};
+  if (!FLAGS_gcs_max_retry_count.empty()) {
+    configOverride.emplace(
+        "hive.gcs.max-retry-count", FLAGS_gcs_max_retry_count);
+  }
+  if (!FLAGS_gcs_max_retry_time.empty()) {
+    configOverride.emplace("hive.gcs.max-retry-time", FLAGS_gcs_max_retry_time);
+  }
+  return std::make_shared<const config::ConfigBase>(std::move(configOverride));
+}
+
+int main(int argc, char** argv) {
+  using namespace facebook::velox;
+  gflags::ParseCommandLineFlags(&argc, &argv, false);
+  if (FLAGS_gcs_path.empty()) {
+    gflags::ShowUsageWithFlags(argv[0]);
+    return 1;
+  }
+  filesystems::GcsFileSystem gcfs(newConfiguration());
+  gcfs.initializeClient();
+  std::cout << "Opening file for read " << FLAGS_gcs_path << std::endl;
+  std::unique_ptr<ReadFile> file_read = gcfs.openFileForRead(FLAGS_gcs_path);
+  std::size_t file_size = file_read->size();
+  std::cout << "File size = " << file_size << std::endl;
+  std::string buffer(file_size + 1, '\0');
+  file_read->pread(0 /*offset*/, file_size /*lenght*/, buffer.data());
+  std::cout << "File Content = " << buffer << std::endl;
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/tests/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/gcs/tests/CMakeLists.txt
new file mode 100644
index 000000000000..eedc7ad15dec
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/tests/CMakeLists.txt
@@ -0,0 +1,44 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_lakehouse_gcsfile_test GcsUtilTest.cpp GcsFileSystemTest.cpp)
+add_test(velox_lakehouse_gcsfile_test velox_lakehouse_gcsfile_test)
+target_link_libraries(
+  velox_lakehouse_gcsfile_test
+  velox_core
+  velox_dwio_common_exception
+  velox_exec
+  velox_exec_test_lib
+  velox_file
+  velox_lakehouse_gcs
+  velox_hiveV2_connector
+  velox_temp_path
+  GTest::gmock
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_lakehouse_gcs_insert_test GcsInsertTest.cpp)
+add_test(velox_lakehouse_gcs_insert_test velox_lakehouse_gcs_insert_test)
+target_link_libraries(
+  velox_lakehouse_gcs_insert_test
+  velox_file
+  velox_lakehouse_gcs
+  velox_hiveV2_config
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gmock
+  GTest::gtest
+  GTest::gtest_main)
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsEmulator.h b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsEmulator.h
new file mode 100644
index 000000000000..c18fa18dc383
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsEmulator.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <boost/process.hpp>
+#include <gmock/gmock-matchers.h>
+#include <gmock/gmock-more-matchers.h>
+#include <google/cloud/storage/client.h>
+#include "gtest/gtest.h"
+
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+#include "velox/exec/tests/utils/PortUtil.h"
+
+namespace bp = boost::process;
+namespace gc = google::cloud;
+namespace gcs = google::cloud::storage;
+
+namespace facebook::velox::filesystems {
+
+static std::string_view kLoremIpsum =
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor"
+    "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis "
+    "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
+    "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu"
+    "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in"
+    "culpa qui officia deserunt mollit anim id est laborum.";
+
+class GcsEmulator : public testing::Environment {
+ public:
+  GcsEmulator() {
+    auto port = std::to_string(exec::test::getFreePort());
+    endpoint_ = "http://localhost:" + port;
+    std::vector<std::string> names{"python3", "python"};
+    // If the build script or application developer provides a value in the
+    // PYTHON environment variable, then just use that.
+    if (const auto* env = std::getenv("PYTHON")) {
+      names = {env};
+    }
+    std::stringstream error;
+    error << R"""({>>"Coud not start GCS emulator."
+        " The following list of python interpreter names were used:"})""";
+    for (const auto& interpreter : names) {
+      auto exe_path = bp::search_path(interpreter);
+      error << " " << interpreter;
+      if (exe_path.empty()) {
+        error << " (exe not found)";
+        continue;
+      }
+
+      serverProcess_ = bp::child(
+          boost::this_process::environment(),
+          exe_path,
+          "-m",
+          "testbench",
+          "--port",
+          port,
+          group_);
+      if (serverProcess_.valid()) {
+        return;
+      }
+      error << " (failed to start)";
+      serverProcess_.terminate();
+      serverProcess_.wait();
+    }
+    VELOX_FAIL(error.str());
+  }
+
+  ~GcsEmulator() override {
+    // Brutal shutdown, kill the full process group because the GCS emulator
+    // may launch additional children.
+    group_.terminate();
+    if (serverProcess_.valid()) {
+      serverProcess_.wait();
+    }
+  }
+
+  std::shared_ptr<const config::ConfigBase> hiveConfig(
+      const std::unordered_map<std::string, std::string> configOverride = {})
+      const {
+    std::unordered_map<std::string, std::string> config(
+        {{"hive.gcs.endpoint", endpoint_}});
+
+    // Update the default config map with the supplied configOverride map
+    for (const auto& [configName, configValue] : configOverride) {
+      config[configName] = configValue;
+    }
+
+    return std::make_shared<const config::ConfigBase>(std::move(config));
+  }
+
+  std::string_view preexistingBucketName() {
+    return bucketName_;
+  }
+
+  std::string_view preexistingObjectName() {
+    return objectName_;
+  }
+
+  void bootstrap() {
+    ASSERT_THAT(this, ::testing::NotNull());
+
+    // Create a bucket and a small file in the testbench. This makes it easier
+    // to bootstrap GcsFileSystem and its tests.
+    auto client = gcs::Client(
+        google::cloud::Options{}
+            .set<gcs::RestEndpointOption>(this->endpoint_)
+            .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials()));
+
+    auto bucket = client.CreateBucketForProject(
+        bucketName_, "ignored-by-testbench", gcs::BucketMetadata{});
+    ASSERT_TRUE(bucket.ok()) << "Failed to create bucket <" << bucketName_
+                             << ">, status=" << bucket.status();
+
+    auto object = client.InsertObject(bucketName_, objectName_, kLoremIpsum);
+    ASSERT_TRUE(object.ok()) << "Failed to create object <" << objectName_
+                             << ">, status=" << object.status();
+  }
+
+ private:
+  std::string endpoint_;
+  bp::child serverProcess_;
+  bp::group group_;
+  static std::string bucketName_;
+  static std::string objectName_;
+};
+
+std::string GcsEmulator::bucketName_{"test1-gcs"};
+std::string GcsEmulator::objectName_{"test-object-name"};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsFileSystemTest.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsFileSystemTest.cpp
new file mode 100644
index 000000000000..a05c7ab12105
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsFileSystemTest.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsFileSystem.h"
+#include "GcsEmulator.h"
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/file/File.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+
+#include "gtest/gtest.h"
+
+namespace facebook::velox::filesystems {
+namespace {
+
+class GcsFileSystemTest : public testing::Test {
+ public:
+  void SetUp() {
+    emulator_ = std::make_shared<GcsEmulator>();
+    emulator_->bootstrap();
+  }
+
+  std::shared_ptr<GcsEmulator> emulator_;
+};
+
+TEST_F(GcsFileSystemTest, readFile) {
+  const auto gcsFile = gcsURI(
+      emulator_->preexistingBucketName(), emulator_->preexistingObjectName());
+
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  auto readFile = gcfs.openFileForRead(gcsFile);
+  std::int64_t size = readFile->size();
+  std::int64_t ref_size = kLoremIpsum.length();
+  EXPECT_EQ(size, ref_size);
+  EXPECT_EQ(readFile->pread(0, size), kLoremIpsum);
+
+  char buffer1[size];
+  ASSERT_EQ(readFile->pread(0, size, &buffer1), kLoremIpsum);
+  ASSERT_EQ(readFile->size(), ref_size);
+
+  char buffer2[50];
+  ASSERT_EQ(readFile->pread(10, 50, &buffer2), kLoremIpsum.substr(10, 50));
+  ASSERT_EQ(readFile->size(), ref_size);
+
+  EXPECT_EQ(readFile->pread(10, size - 10), kLoremIpsum.substr(10));
+
+  char buff1[10];
+  char buff2[20];
+  char buff3[30];
+  std::vector<folly::Range<char*>> buffers = {
+      folly::Range<char*>(buff1, 10),
+      folly::Range<char*>(nullptr, 20),
+      folly::Range<char*>(buff2, 20),
+      folly::Range<char*>(nullptr, 30),
+      folly::Range<char*>(buff3, 30)};
+  ASSERT_EQ(10 + 20 + 20 + 30 + 30, readFile->preadv(0, buffers));
+  ASSERT_EQ(std::string_view(buff1, sizeof(buff1)), kLoremIpsum.substr(0, 10));
+  ASSERT_EQ(std::string_view(buff2, sizeof(buff2)), kLoremIpsum.substr(30, 20));
+  ASSERT_EQ(std::string_view(buff3, sizeof(buff3)), kLoremIpsum.substr(80, 30));
+}
+
+TEST_F(GcsFileSystemTest, writeAndReadFile) {
+  const std::string_view newFile = "readWriteFile.txt";
+  const auto gcsFile = gcsURI(emulator_->preexistingBucketName(), newFile);
+
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  auto writeFile = gcfs.openFileForWrite(gcsFile);
+  std::string_view kDataContent =
+      "Dance me to your beauty with a burning violin"
+      "Dance me through the panic till I'm gathered safely in"
+      "Lift me like an olive branch and be my homeward dove"
+      "Dance me to the end of love";
+
+  EXPECT_EQ(writeFile->size(), 0);
+  std::int64_t contentSize = kDataContent.length();
+  writeFile->append(kDataContent.substr(0, 10));
+  EXPECT_EQ(writeFile->size(), 10);
+  writeFile->append(kDataContent.substr(10, contentSize - 10));
+  EXPECT_EQ(writeFile->size(), contentSize);
+  writeFile->flush();
+  writeFile->close();
+  VELOX_ASSERT_THROW(
+      writeFile->append(kDataContent.substr(0, 10)), "File is not open");
+
+  auto readFile = gcfs.openFileForRead(gcsFile);
+  std::int64_t size = readFile->size();
+  EXPECT_EQ(readFile->size(), contentSize);
+  EXPECT_EQ(readFile->pread(0, size), kDataContent);
+
+  // Opening an existing file for write must be an error.
+  filesystems::GcsFileSystem newGcfs(emulator_->hiveConfig());
+  newGcfs.initializeClient();
+  VELOX_ASSERT_THROW(newGcfs.openFileForWrite(gcsFile), "File already exists");
+}
+
+TEST_F(GcsFileSystemTest, renameNotImplemented) {
+  const std::string_view file = "newTest.txt";
+  const auto gcsExistingFile = gcsURI(
+      emulator_->preexistingBucketName(), emulator_->preexistingObjectName());
+  const auto gcsNewFile = gcsURI(emulator_->preexistingBucketName(), file);
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  gcfs.openFileForRead(gcsExistingFile);
+  VELOX_ASSERT_THROW(
+      gcfs.rename(gcsExistingFile, gcsNewFile, true),
+      "rename for GCS not implemented");
+}
+
+TEST_F(GcsFileSystemTest, mkdirNotImplemented) {
+  const std::string_view dir = "newDirectory";
+  const auto gcsNewDirectory = gcsURI(emulator_->preexistingBucketName(), dir);
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  VELOX_ASSERT_THROW(
+      gcfs.mkdir(gcsNewDirectory), "mkdir for GCS not implemented");
+}
+
+TEST_F(GcsFileSystemTest, rmdirNotImplemented) {
+  const std::string_view dir = "Directory";
+  const auto gcsDirectory = gcsURI(emulator_->preexistingBucketName(), dir);
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  VELOX_ASSERT_THROW(gcfs.rmdir(gcsDirectory), "rmdir for GCS not implemented");
+}
+
+TEST_F(GcsFileSystemTest, missingFile) {
+  const std::string_view file = "newTest.txt";
+  const auto gcsFile = gcsURI(emulator_->preexistingBucketName(), file);
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      gcfs.openFileForRead(gcsFile),
+      error_code::kFileNotFound,
+      "\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\"");
+}
+
+TEST_F(GcsFileSystemTest, missingBucket) {
+  filesystems::GcsFileSystem gcfs(emulator_->hiveConfig());
+  gcfs.initializeClient();
+  const std::string_view gcsFile = "gs://dummy/foo.txt";
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      gcfs.openFileForRead(gcsFile),
+      error_code::kFileNotFound,
+      "\\\"message\\\": \\\"Bucket dummy does not exist.\\\"");
+}
+
+TEST_F(GcsFileSystemTest, credentialsConfig) {
+  // credentials from arrow gcsfs test case
+  // While this service account key has the correct format, it cannot be used
+  // for authentication because the key has been deactivated on the server-side,
+  // *and* the account(s) involved are deleted *and* they are not the accounts
+  // or projects do not match its contents.
+  const std::string_view kCreds = R"""({
+      "type": "service_account",
+      "project_id": "foo-project",
+      "private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111",
+      "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFA"
+      "ASCBKcwggSjAgEAAoIBAQCltiF2oP3KJJ+S\ntTc1McylY+TuAi3AdohX7mmqIjd8a3eBYDHs7"
+      "FlnUrFC4CRijCr0rUqYfg2pmk4a\n6TaKbQRAhWDJ7XD931g7EBvCtd8+JQBNWVKnP9ByJUaO0h"
+      "WVniM50KTsWtyX3up/\nfS0W2R8Cyx4yvasE8QHH8gnNGtr94iiORDC7De2BwHi/iU8FxMVJAIyD"
+      "LNfyk0hN\neheYKfIDBgJV2v6VaCOGWaZyEuD0FJ6wFeLybFBwibrLIBE5Y/StCrZoVZ5LocFP\n"
+      "T4o8kT7bU6yonudSCyNMedYmqHj/iF8B2UN1WrYx8zvoDqZk0nxIglmEYKn/6U7U\ngyETGcW9Ag"
+      "MBAAECggEAC231vmkpwA7JG9UYbviVmSW79UecsLzsOAZnbtbn1VLT\nPg7sup7tprD/LXHoyIxK7S"
+      "/jqINvPU65iuUhgCg3Rhz8+UiBhd0pCH/arlIdiPuD\n2xHpX8RIxAq6pGCsoPJ0kwkHSw8UTnxPV8Z"
+      "CPSRyHV71oQHQgSl/WjNhRi6PQroB\nSqc/pS1m09cTwyKQIopBBVayRzmI2BtBxyhQp9I8t5b7PYkE"
+      "ZDQlbdq0j5Xipoov\n9EW0+Zvkh1FGNig8IJ9Wp+SZi3rd7KLpkyKPY7BK/g0nXBkDxn019cET0SdJOH"
+      "QG\nDiHiv4yTRsDCHZhtEbAMKZEpku4WxtQ+JjR31l8ueQKBgQDkO2oC8gi6vQDcx/CX\nZ23x2ZUyar"
+      "6i0BQ8eJFAEN+IiUapEeCVazuxJSt4RjYfwSa/p117jdZGEWD0GxMC\n+iAXlc5LlrrWs4MWUc0AHTgX"
+      "na28/vii3ltcsI0AjWMqaybhBTTNbMFa2/fV2OX2\nUimuFyBWbzVc3Zb9KAG4Y7OmJQKBgQC5324IjX"
+      "Pq5oH8UWZTdJPuO2cgRsvKmR/r\n9zl4loRjkS7FiOMfzAgUiXfH9XCnvwXMqJpuMw2PEUjUT+OyWjJO"
+      "NEK4qGFJkbN5\n3ykc7p5V7iPPc7Zxj4mFvJ1xjkcj+i5LY8Me+gL5mGIrJ2j8hbuv7f+PWIauyjnp\n"
+      "Nx/0GVFRuQKBgGNT4D1L7LSokPmFIpYh811wHliE0Fa3TDdNGZnSPhaD9/aYyy78\nLkxYKuT7WY7UVv"
+      "LN+gdNoVV5NsLGDa4cAV+CWPfYr5PFKGXMT/Wewcy1WOmJ5des\nAgMC6zq0TdYmMBN6WpKUpEnQtbmh"
+      "3eMnuvADLJWxbH3wCkg+4xDGg2bpAoGAYRNk\nMGtQQzqoYNNSkfus1xuHPMA8508Z8O9pwKU795R3zQ"
+      "s1NAInpjI1sOVrNPD7Ymwc\nW7mmNzZbxycCUL/yzg1VW4P1a6sBBYGbw1SMtWxun4ZbnuvMc2CTCh+43"
+      "/1l+FHe\nMmt46kq/2rH2jwx5feTbOE6P6PINVNRJh/9BDWECgYEAsCWcH9D3cI/QDeLG1ao7\nrE2Nckn"
+      "P8N783edM07Z/zxWsIsXhBPY3gjHVz2LDl+QHgPWhGML62M0ja/6SsJW3\nYvLLIc82V7eqcVJTZtaFkuh"
+      "t68qu/Jn1ezbzJMJ4YXDYo1+KFi+2CAGR06QILb+I\nlUtj+/nH3HDQjM4ltYfTPUg=\n"
+      "-----END PRIVATE KEY-----\n",
+      "client_email": "foo-email@foo-project.iam.gserviceaccount.com",
+      "client_id": "100000000000000000001",
+      "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+      "token_uri": "https://oauth2.googleapis.com/token",
+      "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+      "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com"
+  })""";
+  auto jsonFile = exec::test::TempFilePath::create();
+  std::ofstream credsOut(jsonFile->getPath());
+  credsOut << kCreds;
+  credsOut.close();
+
+  std::unordered_map<std::string, std::string> configOverride = {
+      {"hive.gcs.json-key-file-path", jsonFile->getPath()}};
+  auto hiveConfig = emulator_->hiveConfig(configOverride);
+
+  filesystems::GcsFileSystem gcfs(hiveConfig);
+  gcfs.initializeClient();
+  const auto gcsFile = gcsURI(
+      emulator_->preexistingBucketName(), emulator_->preexistingObjectName());
+  VELOX_ASSERT_THROW(
+      gcfs.openFileForRead(gcsFile), "Invalid ServiceAccountCredentials");
+}
+} // namespace
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsInsertTest.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsInsertTest.cpp
new file mode 100644
index 000000000000..53eb6b85efd7
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsInsertTest.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/init/Init.h>
+#include <gtest/gtest.h>
+
+#include "GcsEmulator.h"
+#include "velox/connectors/lakehouse/storage_adapters/gcs/RegisterGcsFileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/test_common/InsertTest.h"
+
+using namespace facebook::velox::exec::test;
+
+namespace facebook::velox::filesystems {
+namespace {
+
+class GcsInsertTest : public testing::Test, public test::InsertTest {
+ protected:
+  static void SetUpTestSuite() {
+    registerGcsFileSystem();
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  void SetUp() override {
+    connector::registerConnectorFactory(
+        std::make_shared<connector::hive::HiveConnectorFactory>());
+    emulator_ = std::make_shared<GcsEmulator>();
+    emulator_->bootstrap();
+    auto hiveConnector =
+        connector::getConnectorFactory(
+            connector::hive::HiveConnectorFactory::kHiveConnectorName)
+            ->newConnector(
+                connector::hive::test::kHiveConnectorId,
+                emulator_->hiveConfig(),
+                ioExecutor_.get());
+    connector::registerConnector(hiveConnector);
+    parquet::registerParquetReaderFactory();
+    parquet::registerParquetWriterFactory();
+    ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(3);
+  }
+
+  void TearDown() override {
+    parquet::unregisterParquetReaderFactory();
+    parquet::unregisterParquetWriterFactory();
+    connector::unregisterConnectorFactory(
+        connector::hive::HiveConnectorFactory::kHiveConnectorName);
+    connector::unregisterConnector(connector::hive::test::kHiveConnectorId);
+  }
+
+  std::shared_ptr<GcsEmulator> emulator_;
+  std::unique_ptr<folly::IOThreadPoolExecutor> ioExecutor_;
+};
+} // namespace
+
+TEST_F(GcsInsertTest, gcsInsertTest) {
+  const int64_t kExpectedRows = 1'000;
+  const auto gcsBucket = gcsURI(emulator_->preexistingBucketName(), "");
+  runInsertTest(gcsBucket, kExpectedRows, pool());
+}
+} // namespace facebook::velox::filesystems
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  folly::Init init{&argc, &argv, false};
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsUtilTest.cpp b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsUtilTest.cpp
new file mode 100644
index 000000000000..3347231e2299
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/gcs/tests/GcsUtilTest.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/gcs/GcsUtil.h"
+
+#include "gtest/gtest.h"
+
+using namespace facebook::velox;
+
+TEST(GcsUtilTest, isGcsFile) {
+  EXPECT_FALSE(isGcsFile("gs:"));
+  EXPECT_FALSE(isGcsFile("gs::/bucket"));
+  EXPECT_FALSE(isGcsFile("gs:/bucket"));
+  EXPECT_TRUE(isGcsFile("gs://bucket/file.txt"));
+}
+
+TEST(GcsUtilTest, setBucketAndKeyFromGcsPath) {
+  std::string bucket, key;
+  auto path = "bucket/file.txt";
+  setBucketAndKeyFromGcsPath(path, bucket, key);
+  EXPECT_EQ(bucket, "bucket");
+  EXPECT_EQ(key, "file.txt");
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/hdfs/CMakeLists.txt
new file mode 100644
index 000000000000..4837ccf21912
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# for generated headers
+
+velox_add_library(velox_lakehouse_hdfs RegisterHdfsFileSystem.cpp)
+
+if(VELOX_ENABLE_HDFS)
+  velox_sources(
+    velox_lakehouse_hdfs
+    PRIVATE
+          HdfsFileSystem.cpp
+          HdfsReadFile.cpp
+          HdfsWriteFile.cpp)
+  velox_link_libraries(
+    velox_lakehouse_hdfs
+    velox_external_hdfs
+    velox_dwio_common
+    Folly::folly
+    xsimd)
+
+  if(${VELOX_BUILD_TESTING})
+    add_subdirectory(tests)
+  endif()
+endif()
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.cpp
new file mode 100644
index 000000000000..c2d566b15795
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "HdfsFileSystem.h"
+#include "HdfsReadFile.h"
+#include "HdfsWriteFile.h"
+#include "velox/common/config/Config.h"
+#include "velox/external/hdfs/ArrowHdfsInternal.h"
+
+namespace facebook::velox::filesystems {
+std::string_view HdfsFileSystem::kScheme("hdfs://");
+
+std::string_view HdfsFileSystem::kViewfsScheme("viewfs://");
+
+class HdfsFileSystem::Impl {
+ public:
+  // Keep config here for possible use in the future.
+  explicit Impl(
+      const config::ConfigBase* config,
+      const HdfsServiceEndpoint& endpoint) {
+    auto status = filesystems::arrow::io::internal::ConnectLibHdfs(&driver_);
+    if (!status.ok()) {
+      LOG(ERROR) << "ConnectLibHdfs failed due to: " << status.ToString();
+    }
+
+    // connect to HDFS with the builder object
+    hdfsBuilder* builder = driver_->NewBuilder();
+    if (endpoint.isViewfs) {
+      // The default NameNode configuration will be used (from the XML
+      // configuration files). See:
+      // https://github.com/facebookincubator/velox/blob/main/velox/external/hdfs/hdfs.h#L289
+      driver_->BuilderSetNameNode(builder, "default");
+    } else {
+      driver_->BuilderSetNameNode(builder, endpoint.host.c_str());
+      driver_->BuilderSetNameNodePort(builder, atoi(endpoint.port.data()));
+    }
+    driver_->BuilderSetForceNewInstance(builder);
+    hdfsClient_ = driver_->BuilderConnect(builder);
+    VELOX_CHECK_NOT_NULL(
+        hdfsClient_,
+        "Unable to connect to HDFS: {}, got error: {}.",
+        endpoint.identity(),
+        driver_->GetLastExceptionRootCause());
+  }
+
+  ~Impl() {
+    LOG(INFO) << "Disconnecting HDFS file system";
+    int disconnectResult = driver_->Disconnect(hdfsClient_);
+    if (disconnectResult != 0) {
+      LOG(WARNING) << "hdfs disconnect failure in HdfsReadFile close: "
+                   << errno;
+    }
+  }
+
+  hdfsFS hdfsClient() {
+    return hdfsClient_;
+  }
+
+  filesystems::arrow::io::internal::LibHdfsShim* hdfsShim() {
+    return driver_;
+  }
+
+ private:
+  hdfsFS hdfsClient_;
+  filesystems::arrow::io::internal::LibHdfsShim* driver_;
+};
+
+HdfsFileSystem::HdfsFileSystem(
+    const std::shared_ptr<const config::ConfigBase>& config,
+    const HdfsServiceEndpoint& endpoint)
+    : FileSystem(config) {
+  impl_ = std::make_shared<Impl>(config.get(), endpoint);
+}
+
+std::string HdfsFileSystem::name() const {
+  return "HDFS";
+}
+
+std::unique_ptr<ReadFile> HdfsFileSystem::openFileForRead(
+    std::string_view path,
+    const FileOptions& /*unused*/) {
+  // Only remove the schema for hdfs path.
+  if (path.find(kScheme) == 0) {
+    path.remove_prefix(kScheme.length());
+    if (auto index = path.find('/')) {
+      path.remove_prefix(index);
+    }
+  }
+  return std::make_unique<HdfsReadFile>(
+      impl_->hdfsShim(), impl_->hdfsClient(), path);
+}
+
+std::unique_ptr<WriteFile> HdfsFileSystem::openFileForWrite(
+    std::string_view path,
+    const FileOptions& /*unused*/) {
+  return std::make_unique<HdfsWriteFile>(
+      impl_->hdfsShim(), impl_->hdfsClient(), path);
+}
+
+bool HdfsFileSystem::isHdfsFile(const std::string_view filePath) {
+  return (filePath.find(kScheme) == 0) || (filePath.find(kViewfsScheme) == 0);
+}
+
+/// Gets hdfs endpoint from a given file path. If not found, fall back to get a
+/// fixed one from configuration.
+HdfsServiceEndpoint HdfsFileSystem::getServiceEndpoint(
+    const std::string_view filePath,
+    const config::ConfigBase* config) {
+  if (filePath.find(kViewfsScheme) == 0) {
+    return HdfsServiceEndpoint{"viewfs", "", true};
+  }
+
+  auto endOfIdentityInfo = filePath.find('/', kScheme.size());
+  std::string hdfsIdentity{
+      filePath.data(), kScheme.size(), endOfIdentityInfo - kScheme.size()};
+  if (hdfsIdentity.empty()) {
+    // Fall back to get a fixed endpoint from config.
+    auto hdfsHost = config->get<std::string>("hive.hdfs.host");
+    VELOX_CHECK(
+        hdfsHost.hasValue(),
+        "hdfsHost is empty, configuration missing for hdfs host");
+    auto hdfsPort = config->get<std::string>("hive.hdfs.port");
+    VELOX_CHECK(
+        hdfsPort.hasValue(),
+        "hdfsPort is empty, configuration missing for hdfs port");
+    return HdfsServiceEndpoint{*hdfsHost, *hdfsPort};
+  }
+
+  auto hostAndPortSeparator = hdfsIdentity.find(':', 0);
+  // In HDFS HA mode, the hdfsIdentity is a nameservice ID with no port.
+  if (hostAndPortSeparator == std::string::npos) {
+    return HdfsServiceEndpoint{hdfsIdentity, ""};
+  }
+  std::string host{hdfsIdentity.data(), 0, hostAndPortSeparator};
+  std::string port{
+      hdfsIdentity.data(),
+      hostAndPortSeparator + 1,
+      hdfsIdentity.size() - hostAndPortSeparator - 1};
+  return HdfsServiceEndpoint{host, port};
+}
+
+void HdfsFileSystem::remove(std::string_view path) {
+  VELOX_UNSUPPORTED("Does not support removing files from hdfs");
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h
new file mode 100644
index 000000000000..b541ec629baf
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/common/file/FileSystems.h"
+
+namespace facebook::velox::filesystems {
+
+struct HdfsServiceEndpoint {
+  HdfsServiceEndpoint(
+      const std::string& hdfsHost,
+      const std::string& hdfsPort,
+      bool isViewfs = false)
+      : host(hdfsHost), port(hdfsPort), isViewfs(isViewfs) {}
+
+  /// In HDFS HA mode, the identity is a nameservice ID with no port, e.g.,
+  /// the identity is nameservice_id for
+  /// hdfs://nameservice_id/file/path/in/hdfs. Otherwise, a port must be
+  /// contained, e.g., the identity is hdfs_namenode:9000 for
+  /// hdfs://hdfs_namenode:9000/file/path/in/hdfs.
+  std::string identity() const {
+    return host + (port.empty() ? "" : ":" + port);
+  }
+
+  const std::string host;
+  const std::string port;
+  bool isViewfs;
+};
+
+/**
+ * You can configure hdfs settings (timeouts etc) using configure file
+ * which is given by environment parameter LIBHDFS3_CONF
+ * or "hdfs-client.xml" in working directory.
+ *
+ * Internally you can use hdfsBuilderConfSetStr to configure the client
+ */
+class HdfsFileSystem : public FileSystem {
+ public:
+  explicit HdfsFileSystem(
+      const std::shared_ptr<const config::ConfigBase>& config,
+      const HdfsServiceEndpoint& endpoint);
+
+  std::string name() const override;
+
+  std::unique_ptr<ReadFile> openFileForRead(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  std::unique_ptr<WriteFile> openFileForWrite(
+      std::string_view path,
+      const FileOptions& options = {}) override;
+
+  void remove(std::string_view path) override;
+
+  virtual void rename(
+      std::string_view path,
+      std::string_view newPath,
+      bool overWrite = false) override {
+    VELOX_UNSUPPORTED("rename for HDFs not implemented");
+  }
+
+  bool exists(std::string_view path) override {
+    VELOX_UNSUPPORTED("exists for HDFS not implemented");
+  }
+
+  virtual std::vector<std::string> list(std::string_view path) override {
+    VELOX_UNSUPPORTED("list for HDFS not implemented");
+  }
+
+  void mkdir(std::string_view path, const DirectoryOptions& options = {})
+      override {
+    VELOX_UNSUPPORTED("mkdir for HDFS not implemented");
+  }
+
+  void rmdir(std::string_view path) override {
+    VELOX_UNSUPPORTED("rmdir for HDFS not implemented");
+  }
+
+  static bool isHdfsFile(std::string_view filename);
+
+  /// The given filePath is used to infer hdfs endpoint. If hdfs identity is
+  /// missing from filePath, the configured "hive.hdfs.host" & "hive.hdfs.port"
+  /// will be used.
+  static HdfsServiceEndpoint getServiceEndpoint(
+      const std::string_view filePath,
+      const config::ConfigBase* config);
+
+  static std::string_view kScheme;
+
+  static std::string_view kViewfsScheme;
+
+ protected:
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.cpp
new file mode 100644
index 000000000000..affc1dfd2ede
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HdfsReadFile.h"
+#include "velox/external/hdfs/ArrowHdfsInternal.h"
+
+namespace facebook::velox {
+
+struct HdfsFile {
+  filesystems::arrow::io::internal::LibHdfsShim* driver_;
+  hdfsFS client_;
+  hdfsFile handle_;
+
+  HdfsFile() : driver_(nullptr), client_(nullptr), handle_(nullptr) {}
+  ~HdfsFile() {
+    if (handle_ && driver_->CloseFile(client_, handle_) == -1) {
+      LOG(ERROR) << "Unable to close file, errno: " << errno;
+    }
+  }
+
+  void open(
+      filesystems::arrow::io::internal::LibHdfsShim* driver,
+      hdfsFS client,
+      const std::string& path) {
+    driver_ = driver;
+    client_ = client;
+    handle_ = driver->OpenFile(client, path.data(), O_RDONLY, 0, 0, 0);
+    VELOX_CHECK_NOT_NULL(
+        handle_,
+        "Unable to open file {}. got error: {}",
+        path,
+        driver_->GetLastExceptionRootCause());
+  }
+
+  void seek(uint64_t offset) const {
+    VELOX_CHECK_EQ(
+        driver_->Seek(client_, handle_, offset),
+        0,
+        "Cannot seek through HDFS file, error is : {}",
+        driver_->GetLastExceptionRootCause());
+  }
+
+  int32_t read(char* pos, uint64_t length) const {
+    auto bytesRead = driver_->Read(client_, handle_, pos, length);
+    VELOX_CHECK(bytesRead >= 0, "Read failure in HDFSReadFile::preadInternal.");
+    return bytesRead;
+  }
+};
+
+class HdfsReadFile::Impl {
+ public:
+  Impl(
+      filesystems::arrow::io::internal::LibHdfsShim* driver,
+      hdfsFS hdfs,
+      const std::string_view path)
+      : driver_(driver), hdfsClient_(hdfs), filePath_(path) {
+    fileInfo_ = driver_->GetPathInfo(hdfsClient_, filePath_.data());
+    if (fileInfo_ == nullptr) {
+      auto error = fmt::format(
+          "FileNotFoundException: Path {} does not exist.", filePath_);
+      auto errMsg = fmt::format(
+          "Unable to get file path info for file: {}. got error: {}",
+          filePath_,
+          error);
+      if (error.find("FileNotFoundException") != std::string::npos) {
+        VELOX_FILE_NOT_FOUND_ERROR(errMsg);
+      }
+      VELOX_FAIL(errMsg);
+    }
+  }
+
+  ~Impl() {
+    // Should call hdfsFreeFileInfo to avoid memory leak
+    if (fileInfo_) {
+      driver_->FreeFileInfo(fileInfo_, 1);
+    }
+  }
+
+  void preadInternal(uint64_t offset, uint64_t length, char* pos) const {
+    checkFileReadParameters(offset, length);
+    if (!file_->handle_) {
+      file_->open(driver_, hdfsClient_, filePath_);
+    }
+    file_->seek(offset);
+    uint64_t totalBytesRead = 0;
+    while (totalBytesRead < length) {
+      auto bytesRead = file_->read(pos, length - totalBytesRead);
+      totalBytesRead += bytesRead;
+      pos += bytesRead;
+    }
+  }
+
+  std::string_view pread(uint64_t offset, uint64_t length, void* buf) const {
+    preadInternal(offset, length, static_cast<char*>(buf));
+    return {static_cast<char*>(buf), length};
+  }
+
+  std::string pread(uint64_t offset, uint64_t length) const {
+    std::string result(length, 0);
+    char* pos = result.data();
+    preadInternal(offset, length, pos);
+    return result;
+  }
+
+  uint64_t size() const {
+    return fileInfo_->mSize;
+  }
+
+  uint64_t memoryUsage() const {
+    return fileInfo_->mBlockSize;
+  }
+
+  bool shouldCoalesce() const {
+    return false;
+  }
+
+  std::string getName() const {
+    return filePath_;
+  }
+
+  void checkFileReadParameters(uint64_t offset, uint64_t length) const {
+    auto fileSize = size();
+    auto endPoint = offset + length;
+    VELOX_CHECK_GE(
+        fileSize,
+        endPoint,
+        "Cannot read HDFS file beyond its size: {}, offset: {}, end point: {}",
+        fileSize,
+        offset,
+        endPoint);
+  }
+
+ private:
+  filesystems::arrow::io::internal::LibHdfsShim* driver_;
+  hdfsFS hdfsClient_;
+  std::string filePath_;
+  hdfsFileInfo* fileInfo_;
+  folly::ThreadLocal<HdfsFile> file_;
+};
+
+HdfsReadFile::HdfsReadFile(
+    filesystems::arrow::io::internal::LibHdfsShim* driver,
+    hdfsFS hdfs,
+    const std::string_view path)
+    : pImpl(std::make_unique<Impl>(driver, hdfs, path)) {}
+
+HdfsReadFile::~HdfsReadFile() = default;
+
+std::string_view HdfsReadFile::pread(
+    uint64_t offset,
+    uint64_t length,
+    void* buf,
+    filesystems::File::IoStats* stats) const {
+  return pImpl->pread(offset, length, buf);
+}
+
+std::string HdfsReadFile::pread(
+    uint64_t offset,
+    uint64_t length,
+    filesystems::File::IoStats* stats) const {
+  return pImpl->pread(offset, length);
+}
+
+uint64_t HdfsReadFile::size() const {
+  return pImpl->size();
+}
+
+uint64_t HdfsReadFile::memoryUsage() const {
+  return pImpl->memoryUsage();
+}
+
+bool HdfsReadFile::shouldCoalesce() const {
+  return pImpl->shouldCoalesce();
+}
+
+std::string HdfsReadFile::getName() const {
+  return pImpl->getName();
+}
+
+void HdfsReadFile::checkFileReadParameters(uint64_t offset, uint64_t length)
+    const {
+  pImpl->checkFileReadParameters(offset, length);
+}
+
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.h b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.h
new file mode 100644
index 000000000000..ddd35e511a71
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/file/File.h"
+#include "velox/external/hdfs/hdfs.h"
+
+namespace facebook::velox {
+
+namespace filesystems::arrow::io::internal {
+class LibHdfsShim;
+}
+
+/**
+ * Implementation of hdfs read file.
+ */
+class HdfsReadFile final : public ReadFile {
+ public:
+  explicit HdfsReadFile(
+      filesystems::arrow::io::internal::LibHdfsShim* driver,
+      hdfsFS hdfs,
+      std::string_view path);
+  ~HdfsReadFile() override;
+
+  std::string_view pread(
+      uint64_t offset,
+      uint64_t length,
+      void* buf,
+      filesystems::File::IoStats* stats = nullptr) const final;
+
+  std::string pread(
+      uint64_t offset,
+      uint64_t length,
+      filesystems::File::IoStats* stats = nullptr) const final;
+
+  uint64_t size() const final;
+
+  uint64_t memoryUsage() const final;
+
+  bool shouldCoalesce() const final;
+
+  std::string getName() const final;
+
+  uint64_t getNaturalReadSize() const final {
+    return 72 << 20;
+  }
+
+ private:
+  void checkFileReadParameters(uint64_t offset, uint64_t length) const;
+
+  class Impl;
+  std::unique_ptr<Impl> pImpl;
+};
+
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsUtil.h b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsUtil.h
new file mode 100644
index 000000000000..ac07d929ae48
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsUtil.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace facebook::velox::filesystems {
+inline std::string getHdfsPath(
+    const std::string& filePath,
+    const std::string_view& kScheme) {
+  auto endOfAuthority = filePath.find('/', kScheme.size());
+  std::string hdfsAuthority{
+      filePath, kScheme.size(), endOfAuthority - kScheme.size()};
+  if (hdfsAuthority.empty()) {
+    return std::string(filePath.substr(kScheme.size()));
+  }
+
+  return std::string(filePath.substr(endOfAuthority));
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.cpp
new file mode 100644
index 000000000000..25d475d1161b
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HdfsWriteFile.h"
+#include "velox/external/hdfs/ArrowHdfsInternal.h"
+
+namespace facebook::velox {
+HdfsWriteFile::HdfsWriteFile(
+    filesystems::arrow::io::internal::LibHdfsShim* driver,
+    hdfsFS hdfsClient,
+    std::string_view path,
+    int bufferSize,
+    short replication,
+    int blockSize)
+    : driver_(driver), hdfsClient_(hdfsClient), filePath_(path) {
+  auto pos = filePath_.rfind("/");
+  auto parentDir = filePath_.substr(0, pos + 1);
+  if (driver_->Exists(hdfsClient_, parentDir.c_str()) == -1) {
+    driver_->MakeDirectory(hdfsClient_, parentDir.c_str());
+  }
+
+  hdfsFile_ = driver_->OpenFile(
+      hdfsClient_,
+      filePath_.c_str(),
+      O_WRONLY,
+      bufferSize,
+      replication,
+      blockSize);
+  VELOX_CHECK_NOT_NULL(
+      hdfsFile_,
+      "Failed to open hdfs file: {}, with error: {}",
+      filePath_,
+      driver_->GetLastExceptionRootCause());
+}
+
+HdfsWriteFile::~HdfsWriteFile() {
+  if (hdfsFile_) {
+    close();
+  }
+}
+
+void HdfsWriteFile::close() {
+  int success = driver_->CloseFile(hdfsClient_, hdfsFile_);
+  VELOX_CHECK_EQ(
+      success,
+      0,
+      "Failed to close hdfs file: {}",
+      driver_->GetLastExceptionRootCause());
+  hdfsFile_ = nullptr;
+}
+
+void HdfsWriteFile::flush() {
+  VELOX_CHECK_NOT_NULL(
+      hdfsFile_,
+      "Cannot flush HDFS file because file handle is null, file path: {}",
+      filePath_);
+  int success = driver_->Flush(hdfsClient_, hdfsFile_);
+  VELOX_CHECK_EQ(
+      success, 0, "Hdfs flush error: {}", driver_->GetLastExceptionRootCause());
+}
+
+void HdfsWriteFile::append(std::string_view data) {
+  if (data.size() == 0) {
+    return;
+  }
+  VELOX_CHECK_NOT_NULL(
+      hdfsFile_,
+      "Cannot append to HDFS file because file handle is null, file path: {}",
+      filePath_);
+  int64_t totalWrittenBytes = driver_->Write(
+      hdfsClient_, hdfsFile_, std::string(data).c_str(), data.size());
+  VELOX_CHECK_EQ(
+      totalWrittenBytes,
+      data.size(),
+      "Write failure in HDFSWriteFile::append {}",
+      driver_->GetLastExceptionRootCause());
+}
+
+uint64_t HdfsWriteFile::size() const {
+  auto fileInfo = driver_->GetPathInfo(hdfsClient_, filePath_.c_str());
+  uint64_t size = fileInfo->mSize;
+  // should call hdfsFreeFileInfo to avoid memory leak
+  driver_->FreeFileInfo(fileInfo, 1);
+  return size;
+}
+
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.h b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.h
new file mode 100644
index 000000000000..fb311b1a6c3d
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/HdfsWriteFile.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "velox/common/file/File.h"
+#include "velox/external/hdfs/hdfs.h"
+
+namespace facebook::velox {
+
+namespace filesystems::arrow::io::internal {
+class LibHdfsShim;
+}
+
+/// Implementation of hdfs write file. Nothing written to the file should be
+/// read back until it is closed.
+class HdfsWriteFile : public WriteFile {
+ public:
+  /// The constructor.
+  /// @param hdfsClient The configured hdfs filesystem handle.
+  /// @param path The file path to write.
+  /// @param bufferSize Size of buffer for write - pass 0 if you want
+  /// to use the default configured values.
+  /// @param replication Block replication - pass 0 if you want to use
+  /// the default configured values.
+  /// @param blockSize Size of block - pass 0 if you want to use the
+  /// default configured values.
+  HdfsWriteFile(
+      filesystems::arrow::io::internal::LibHdfsShim* driver,
+      hdfsFS hdfsClient,
+      std::string_view path,
+      int bufferSize = 0,
+      short replication = 0,
+      int blockSize = 0);
+
+  ~HdfsWriteFile() override;
+
+  /// Get the file size.
+  uint64_t size() const override;
+
+  /// Flush the data.
+  void flush() override;
+
+  /// Write the data by append mode.
+  void append(std::string_view data) override;
+
+  /// Close the file.
+  void close() override;
+
+ private:
+  filesystems::arrow::io::internal::LibHdfsShim* driver_;
+  /// The configured hdfs filesystem handle.
+  hdfsFS hdfsClient_;
+  /// The hdfs file handle for write.
+  hdfsFile hdfsFile_;
+  /// The hdfs file path.
+  const std::string filePath_;
+};
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.cpp
new file mode 100644
index 000000000000..4faed804533f
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef VELOX_ENABLE_HDFS
+#include "folly/concurrency/ConcurrentHashMap.h"
+
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h" // @manual
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsUtil.h" // @manual
+#include "velox/dwio/common/FileSink.h"
+#endif
+
+namespace facebook::velox::filesystems {
+
+#ifdef VELOX_ENABLE_HDFS
+std::mutex mtx;
+
+std::function<std::shared_ptr<
+    FileSystem>(std::shared_ptr<const config::ConfigBase>, std::string_view)>
+hdfsFileSystemGenerator() {
+  static auto filesystemGenerator = [](std::shared_ptr<const config::ConfigBase>
+                                           properties,
+                                       std::string_view filePath) {
+    static folly::ConcurrentHashMap<std::string, std::shared_ptr<FileSystem>>
+        filesystems;
+    static folly::
+        ConcurrentHashMap<std::string, std::shared_ptr<folly::once_flag>>
+            hdfsInitiationFlags;
+    HdfsServiceEndpoint endpoint =
+        HdfsFileSystem::getServiceEndpoint(filePath, properties.get());
+    std::string hdfsIdentity = endpoint.identity();
+    if (filesystems.find(hdfsIdentity) != filesystems.end()) {
+      return filesystems[hdfsIdentity];
+    }
+    std::unique_lock<std::mutex> lk(mtx, std::defer_lock);
+    /// If the init flag for a given hdfs identity is not found,
+    /// create one for init use. It's a singleton.
+    if (hdfsInitiationFlags.find(hdfsIdentity) == hdfsInitiationFlags.end()) {
+      lk.lock();
+      if (hdfsInitiationFlags.find(hdfsIdentity) == hdfsInitiationFlags.end()) {
+        std::shared_ptr<folly::once_flag> initiationFlagPtr =
+            std::make_shared<folly::once_flag>();
+        hdfsInitiationFlags.insert(hdfsIdentity, initiationFlagPtr);
+      }
+      lk.unlock();
+    }
+    folly::call_once(
+        *hdfsInitiationFlags[hdfsIdentity].get(),
+        [&properties, endpoint, hdfsIdentity]() {
+          auto filesystem =
+              std::make_shared<HdfsFileSystem>(properties, endpoint);
+          filesystems.insert(hdfsIdentity, filesystem);
+        });
+    return filesystems[hdfsIdentity];
+  };
+  return filesystemGenerator;
+}
+
+std::function<std::unique_ptr<velox::dwio::common::FileSink>(
+    const std::string&,
+    const velox::dwio::common::FileSink::Options& options)>
+hdfsWriteFileSinkGenerator() {
+  static auto hdfsWriteFileSink =
+      [](const std::string& fileURI,
+         const velox::dwio::common::FileSink::Options& options) {
+        if (HdfsFileSystem::isHdfsFile(fileURI)) {
+          std::string pathSuffix =
+              getHdfsPath(fileURI, HdfsFileSystem::kScheme);
+          auto fileSystem =
+              filesystems::getFileSystem(fileURI, options.connectorProperties);
+          return std::make_unique<dwio::common::WriteFileSink>(
+              fileSystem->openFileForWrite(pathSuffix),
+              fileURI,
+              options.metricLogger,
+              options.stats);
+        }
+        return static_cast<std::unique_ptr<dwio::common::WriteFileSink>>(
+            nullptr);
+      };
+
+  return hdfsWriteFileSink;
+}
+#endif
+
+void registerHdfsFileSystem() {
+#ifdef VELOX_ENABLE_HDFS
+  registerFileSystem(HdfsFileSystem::isHdfsFile, hdfsFileSystemGenerator());
+  dwio::common::FileSink::registerFactory(hdfsWriteFileSinkGenerator());
+#endif
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.h b/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.h
new file mode 100644
index 000000000000..6f6f0c032bd7
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace facebook::velox::filesystems {
+
+// Register the HDFS.
+void registerHdfsFileSystem();
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/CMakeLists.txt
new file mode 100644
index 000000000000..4c47309582b2
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/CMakeLists.txt
@@ -0,0 +1,58 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_lakehouse_hdfs_file_test HdfsFileSystemTest.cpp HdfsMiniCluster.cpp
+        HdfsUtilTest.cpp)
+
+add_test(velox_lakehouse_hdfs_file_test velox_lakehouse_hdfs_file_test)
+target_link_libraries(
+  velox_lakehouse_hdfs_file_test
+  velox_file
+  velox_lakehouse_hdfs
+  velox_core
+  velox_exec_test_lib
+  velox_hiveV2_connector
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main
+  GTest::gmock)
+
+target_compile_options(velox_lakehouse_hdfs_file_test
+                       PRIVATE -Wno-deprecated-declarations)
+
+add_executable(velox_lakehouse_hdfs_insert_test InsertIntoHdfsTest.cpp
+        HdfsMiniCluster.cpp HdfsUtilTest.cpp)
+
+add_test(velox_lakehouse_hdfs_insert_test velox_lakehouse_hdfs_insert_test)
+
+target_link_libraries(
+  velox_lakehouse_hdfs_insert_test
+  velox_exec_test_lib
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main
+  GTest::gmock)
+
+target_compile_options(velox_lakehouse_hdfs_insert_test
+                       PRIVATE -Wno-deprecated-declarations)
+
+# velox_lakehouse_hdfs_insert_test and velox_lakehouse_hdfs_file_test two tests can't run in
+# parallel due to the port conflict in Hadoop NameNode and DataNode. The
+# namenode port conflict can be resolved using the -nnport configuration in
+# hadoop-mapreduce-client-jobclient-3.3.0-tests.jar. However the data node port
+# cannot be configured. Therefore, we need to make sure that
+# velox_lakehouse_hdfs_file_test runs only after velox_lakehouse_hdfs_insert_test has finished.
+set_tests_properties(velox_lakehouse_hdfs_insert_test PROPERTIES DEPENDS
+                                                       velox_lakehouse_hdfs_file_test)
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp
new file mode 100644
index 000000000000..3031045e17a6
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h"
+#include <boost/format.hpp>
+#include <gmock/gmock-matchers.h>
+#include <atomic>
+#include <random>
+#include "HdfsMiniCluster.h"
+#include "gtest/gtest.h"
+#include "velox/common/base/Exceptions.h"
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsReadFile.h"
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.h"
+#include "velox/core/QueryConfig.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+#include "velox/external/hdfs/ArrowHdfsInternal.h"
+
+#include <unistd.h>
+
+using namespace facebook::velox;
+
+using filesystems::arrow::io::internal::LibHdfsShim;
+
+constexpr int kOneMB = 1 << 20;
+static const std::string kDestinationPath = "/test_file.txt";
+static const std::string kSimpleDestinationPath = "hdfs://" + kDestinationPath;
+static const std::string kViewfsDestinationPath =
+    "viewfs://" + kDestinationPath;
+std::unordered_map<std::string, std::string> configurationValues;
+
+class HdfsFileSystemTest : public testing::Test {
+ public:
+  static void SetUpTestSuite() {
+    filesystems::registerHdfsFileSystem();
+    if (miniCluster == nullptr) {
+      miniCluster = std::make_shared<filesystems::test::HdfsMiniCluster>();
+      miniCluster->start();
+      auto tempFile = createFile();
+      miniCluster->addFile(tempFile->getPath(), kDestinationPath);
+    }
+    configurationValues.insert(
+        {"hive.hdfs.host", std::string(miniCluster->host())});
+    configurationValues.insert(
+        {"hive.hdfs.port", std::string(miniCluster->nameNodePort())});
+    fullDestinationPath_ =
+        fmt::format("{}{}", miniCluster->url(), kDestinationPath);
+  }
+
+  void SetUp() override {
+    if (!miniCluster->isRunning()) {
+      miniCluster->start();
+    }
+    filesystems::registerHdfsFileSystem();
+  }
+
+  static void TearDownTestSuite() {
+    miniCluster->stop();
+  }
+
+  static std::unique_ptr<WriteFile> openFileForWrite(std::string_view path) {
+    auto config = std::make_shared<const config::ConfigBase>(
+        std::unordered_map<std::string, std::string>(configurationValues));
+    auto hdfsFilePath = fmt::format("{}{}", miniCluster->url(), path);
+    auto hdfsFileSystem = filesystems::getFileSystem(hdfsFilePath, config);
+    return hdfsFileSystem->openFileForWrite(path);
+  }
+
+  static std::atomic<bool> startThreads;
+  static std::shared_ptr<filesystems::test::HdfsMiniCluster> miniCluster;
+  static std::string fullDestinationPath_;
+
+ private:
+  static std::shared_ptr<::exec::test::TempFilePath> createFile() {
+    auto tempFile = exec::test::TempFilePath::create();
+    tempFile->append("aaaaa");
+    tempFile->append("bbbbb");
+    tempFile->append(std::string(kOneMB, 'c'));
+    tempFile->append("ddddd");
+    return tempFile;
+  }
+};
+
+std::shared_ptr<filesystems::test::HdfsMiniCluster>
+    HdfsFileSystemTest::miniCluster = nullptr;
+std::atomic<bool> HdfsFileSystemTest::startThreads = false;
+std::string HdfsFileSystemTest::fullDestinationPath_;
+
+void readData(ReadFile* readFile) {
+  ASSERT_EQ(readFile->size(), 15 + kOneMB);
+  char buffer1[5];
+  ASSERT_EQ(readFile->pread(10 + kOneMB, 5, &buffer1), "ddddd");
+  char buffer2[10];
+  ASSERT_EQ(readFile->pread(0, 10, &buffer2), "aaaaabbbbb");
+  auto buffer3 = new char[kOneMB];
+  ASSERT_EQ(readFile->pread(10, kOneMB, buffer3), std::string(kOneMB, 'c'));
+  delete[] buffer3;
+  ASSERT_EQ(readFile->size(), 15 + kOneMB);
+  char buffer4[10];
+  auto arf = readFile->pread(5, 10, &buffer4);
+  auto zarf = readFile->pread(kOneMB, 15);
+  auto buf = std::make_unique<char[]>(8);
+  auto warf = readFile->pread(4, 8, buf.get());
+  const std::string_view warfFromBuf(buf.get(), 8);
+  ASSERT_EQ(arf, "bbbbbccccc");
+  ASSERT_EQ(zarf, "ccccccccccddddd");
+  ASSERT_EQ(warf, "abbbbbcc");
+  ASSERT_EQ(warfFromBuf, "abbbbbcc");
+}
+
+void checkReadErrorMessages(
+    ReadFile* readFile,
+    std::string errorMessage,
+    int endpoint) {
+  VELOX_ASSERT_THROW(readFile->pread(10 + kOneMB, endpoint), errorMessage);
+
+  auto buf = std::make_unique<char[]>(8);
+  VELOX_ASSERT_THROW(
+      readFile->pread(10 + kOneMB, endpoint, buf.get()), errorMessage);
+}
+
+bool checkMiniClusterStop(ReadFile* readFile, const std::string& errorMessage) {
+  try {
+    readFile->pread(0, 1);
+    return false;
+  } catch (const VeloxException& error) {
+    return error.message().find(errorMessage) != std::string::npos;
+  }
+}
+
+void verifyFailures(LibHdfsShim* driver, hdfsFS hdfs) {
+  HdfsReadFile readFile(driver, hdfs, kDestinationPath);
+  HdfsReadFile readFile2(driver, hdfs, kDestinationPath);
+  auto startPoint = 10 + kOneMB;
+  auto size = 15 + kOneMB;
+  auto endpoint = 10 + 2 * kOneMB;
+  auto offsetErrorMessage =
+      (boost::format(
+           "(%d vs. %d) Cannot read HDFS file beyond its size: %d, offset: %d, end point: %d") %
+       size % endpoint % size % startPoint % endpoint)
+          .str();
+
+  auto readFailErrorMessage =
+      (boost::format(
+           "Unable to open file %s. got error: ConnectException: Connection refused") %
+       kDestinationPath)
+          .str();
+
+  checkReadErrorMessages(&readFile, offsetErrorMessage, kOneMB);
+  HdfsFileSystemTest::miniCluster->stop();
+
+  constexpr auto kMaxRetries = 10;
+  int retries = 0;
+  while (true) {
+    if (checkMiniClusterStop(&readFile2, readFailErrorMessage)) {
+      checkReadErrorMessages(&readFile2, readFailErrorMessage, 1);
+      break;
+    } else {
+      if (retries >= kMaxRetries) {
+        FAIL() << "MiniCluster doesn't stop after kMaxRetries try";
+      } else {
+        sleep(1);
+        retries++;
+      }
+    }
+  }
+}
+
+hdfsFS connectHdfsDriver(
+    filesystems::arrow::io::internal::LibHdfsShim** driver,
+    const std::string host,
+    const std::string port) {
+  filesystems::arrow::io::internal::LibHdfsShim* libhdfs_shim;
+  auto status = filesystems::arrow::io::internal::ConnectLibHdfs(&libhdfs_shim);
+  VELOX_CHECK(status.ok(), "ConnectLibHdfs failed.");
+
+  // Connect to HDFS with the builder object
+  hdfsBuilder* builder = libhdfs_shim->NewBuilder();
+  libhdfs_shim->BuilderSetNameNode(builder, host.c_str());
+  libhdfs_shim->BuilderSetNameNodePort(builder, std::stoi(port));
+  libhdfs_shim->BuilderSetForceNewInstance(builder);
+
+  auto hdfs = libhdfs_shim->BuilderConnect(builder);
+  VELOX_CHECK_NOT_NULL(
+      hdfs,
+      "Unable to connect to HDFS at {}:{}, got error",
+      host.c_str(),
+      port);
+  *driver = libhdfs_shim;
+  return hdfs;
+}
+
+TEST_F(HdfsFileSystemTest, read) {
+  filesystems::arrow::io::internal::LibHdfsShim* driver;
+  auto hdfs = connectHdfsDriver(
+      &driver,
+      std::string(miniCluster->host()),
+      std::string(miniCluster->nameNodePort()));
+  HdfsReadFile readFile(driver, hdfs, kDestinationPath);
+  readData(&readFile);
+}
+
+TEST_F(HdfsFileSystemTest, viaFileSystem) {
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+  auto readFile = hdfsFileSystem->openFileForRead(fullDestinationPath_);
+  readData(readFile.get());
+}
+
+TEST_F(HdfsFileSystemTest, initializeFsWithEndpointInfoInFilePath) {
+  // Without host/port configured.
+  auto config = std::make_shared<config::ConfigBase>(
+      std::unordered_map<std::string, std::string>());
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+  auto readFile = hdfsFileSystem->openFileForRead(fullDestinationPath_);
+  readData(readFile.get());
+
+  // Wrong endpoint info specified in hdfs file path.
+  const std::string wrongFullDestinationPath =
+      "hdfs://not_exist_host:" + std::string(miniCluster->nameNodePort()) +
+      kDestinationPath;
+  VELOX_ASSERT_THROW(
+      filesystems::getFileSystem(wrongFullDestinationPath, config),
+      "Unable to connect to HDFS");
+}
+
+TEST_F(HdfsFileSystemTest, fallbackToUseConfig) {
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+  auto readFile = hdfsFileSystem->openFileForRead(fullDestinationPath_);
+  readData(readFile.get());
+}
+
+TEST_F(HdfsFileSystemTest, oneFsInstanceForOneEndpoint) {
+  auto hdfsFileSystem1 =
+      filesystems::getFileSystem(fullDestinationPath_, nullptr);
+  auto hdfsFileSystem2 =
+      filesystems::getFileSystem(fullDestinationPath_, nullptr);
+  ASSERT_TRUE(hdfsFileSystem1 == hdfsFileSystem2);
+}
+
+TEST_F(HdfsFileSystemTest, missingFileViaFileSystem) {
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      hdfsFileSystem->openFileForRead(
+          "hdfs://localhost:7777/path/that/does/not/exist"),
+      error_code::kFileNotFound,
+      "Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist.");
+}
+
+TEST_F(HdfsFileSystemTest, missingHost) {
+  std::unordered_map<std::string, std::string> missingHostConfiguration(
+      {{"hive.hdfs.port", std::string(miniCluster->nameNodePort())}});
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::move(missingHostConfiguration));
+
+  VELOX_ASSERT_THROW(
+      filesystems::HdfsFileSystem::getServiceEndpoint(
+          kSimpleDestinationPath, config.get()),
+      "hdfsHost is empty, configuration missing for hdfs host");
+}
+
+TEST_F(HdfsFileSystemTest, missingPort) {
+  std::unordered_map<std::string, std::string> missingPortConfiguration(
+      {{"hive.hdfs.host", std::string(miniCluster->host())}});
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::move(missingPortConfiguration));
+
+  VELOX_ASSERT_THROW(
+      filesystems::HdfsFileSystem::getServiceEndpoint(
+          kSimpleDestinationPath, config.get()),
+      "hdfsPort is empty, configuration missing for hdfs port");
+}
+
+TEST_F(HdfsFileSystemTest, missingFileViaReadFile) {
+  filesystems::arrow::io::internal::LibHdfsShim* driver;
+  auto hdfs = connectHdfsDriver(
+      &driver,
+      std::string(miniCluster->host()),
+      std::string(miniCluster->nameNodePort()));
+  VELOX_ASSERT_THROW(
+      std::make_shared<const HdfsReadFile>(
+          driver, hdfs, "/path/that/does/not/exist"),
+      "Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist.");
+}
+
+TEST_F(HdfsFileSystemTest, schemeMatching) {
+  VELOX_ASSERT_THROW(
+      std::dynamic_pointer_cast<filesystems::HdfsFileSystem>(
+          filesystems::getFileSystem("file://", nullptr)),
+      "No registered file system matched with file path 'file://'")
+
+  auto fs = std::dynamic_pointer_cast<filesystems::HdfsFileSystem>(
+      filesystems::getFileSystem(fullDestinationPath_, nullptr));
+  ASSERT_TRUE(fs->isHdfsFile(fullDestinationPath_));
+
+  fs = std::dynamic_pointer_cast<filesystems::HdfsFileSystem>(
+      filesystems::getFileSystem(kViewfsDestinationPath, nullptr));
+  ASSERT_TRUE(fs->isHdfsFile(kViewfsDestinationPath));
+}
+
+TEST_F(HdfsFileSystemTest, writeSupported) {
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+  hdfsFileSystem->openFileForWrite("/path");
+}
+
+TEST_F(HdfsFileSystemTest, removeNotSupported) {
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+  VELOX_ASSERT_THROW(
+      hdfsFileSystem->remove("/path"),
+      "Does not support removing files from hdfs");
+}
+
+TEST_F(HdfsFileSystemTest, multipleThreadsWithReadFile) {
+  startThreads = false;
+
+  filesystems::arrow::io::internal::LibHdfsShim* driver;
+  auto hdfs = connectHdfsDriver(
+      &driver,
+      std::string(miniCluster->host()),
+      std::string(miniCluster->nameNodePort()));
+  std::vector<std::thread> threads;
+  std::mt19937 generator(std::random_device{}());
+  std::vector<int> sleepTimesInMicroseconds = {0, 500, 50000};
+  std::uniform_int_distribution<std::size_t> distribution(
+      0, sleepTimesInMicroseconds.size() - 1);
+  for (int i = 0; i < 25; i++) {
+    auto thread = std::thread(
+        [&driver, &hdfs, &distribution, &generator, &sleepTimesInMicroseconds] {
+          int index = distribution(generator);
+          while (!HdfsFileSystemTest::startThreads) {
+            std::this_thread::yield();
+          }
+          std::this_thread::sleep_for(
+              std::chrono::microseconds(sleepTimesInMicroseconds[index]));
+          HdfsReadFile readFile(driver, hdfs, kDestinationPath);
+          readData(&readFile);
+        });
+    threads.emplace_back(std::move(thread));
+  }
+  startThreads = true;
+  for (auto& thread : threads) {
+    thread.join();
+  }
+}
+
+TEST_F(HdfsFileSystemTest, multipleThreadsWithFileSystem) {
+  startThreads = false;
+  auto config = std::make_shared<const config::ConfigBase>(
+      std::unordered_map<std::string, std::string>(configurationValues));
+  auto hdfsFileSystem =
+      filesystems::getFileSystem(fullDestinationPath_, config);
+
+  std::vector<std::thread> threads;
+  std::mt19937 generator(std::random_device{}());
+  std::vector<int> sleepTimesInMicroseconds = {0, 500, 50000};
+  std::uniform_int_distribution<std::size_t> distribution(
+      0, sleepTimesInMicroseconds.size() - 1);
+  for (int i = 0; i < 25; i++) {
+    auto thread = std::thread([&hdfsFileSystem,
+                               &distribution,
+                               &generator,
+                               &sleepTimesInMicroseconds] {
+      int index = distribution(generator);
+      while (!HdfsFileSystemTest::startThreads) {
+        std::this_thread::yield();
+      }
+      std::this_thread::sleep_for(
+          std::chrono::microseconds(sleepTimesInMicroseconds[index]));
+      auto readFile = hdfsFileSystem->openFileForRead(fullDestinationPath_);
+      readData(readFile.get());
+    });
+    threads.emplace_back(std::move(thread));
+  }
+  startThreads = true;
+  for (auto& thread : threads) {
+    thread.join();
+  }
+}
+
+TEST_F(HdfsFileSystemTest, write) {
+  const std::string_view path = "/a.txt";
+  auto writeFile = openFileForWrite(path);
+  const std::string_view data = "abcdefghijk";
+  writeFile->append(data);
+  writeFile->flush();
+  ASSERT_EQ(writeFile->size(), 0);
+  writeFile->append(data);
+  writeFile->append(data);
+  writeFile->flush();
+  writeFile->close();
+  ASSERT_EQ(writeFile->size(), data.size() * 3);
+}
+
+TEST_F(HdfsFileSystemTest, missingFileForWrite) {
+  const std::string_view filePath =
+      "hdfs://localhost:7777/path/that/does/not/exist";
+  const std::string_view errorMsg =
+      "Failed to open hdfs file: hdfs://localhost:7777/path/that/does/not/exist";
+  VELOX_ASSERT_THROW(openFileForWrite(filePath), errorMsg);
+}
+
+TEST_F(HdfsFileSystemTest, writeDataFailures) {
+  auto writeFile = openFileForWrite("/a.txt");
+  writeFile->close();
+  VELOX_ASSERT_THROW(
+      writeFile->append("abcde"),
+      "Cannot append to HDFS file because file handle is null, file path: /a.txt");
+}
+
+TEST_F(HdfsFileSystemTest, writeFlushFailures) {
+  auto writeFile = openFileForWrite("/a.txt");
+  writeFile->close();
+  VELOX_ASSERT_THROW(
+      writeFile->flush(),
+      "Cannot flush HDFS file because file handle is null, file path: /a.txt");
+}
+
+TEST_F(HdfsFileSystemTest, writeWithParentDirNotExist) {
+  const std::string_view path = "/parent/directory/that/does/not/exist/a.txt";
+  auto writeFile = openFileForWrite(path);
+  const std::string_view data = "abcdefghijk";
+  writeFile->append(data);
+  writeFile->flush();
+  ASSERT_EQ(writeFile->size(), 0);
+  writeFile->append(data);
+  writeFile->append(data);
+  writeFile->flush();
+  writeFile->close();
+  ASSERT_EQ(writeFile->size(), data.size() * 3);
+}
+
+TEST_F(HdfsFileSystemTest, readFailures) {
+  filesystems::arrow::io::internal::LibHdfsShim* driver;
+  auto hdfs = connectHdfsDriver(
+      &driver,
+      std::string(miniCluster->host()),
+      std::string(miniCluster->nameNodePort()));
+  verifyFailures(driver, hdfs);
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp
new file mode 100644
index 000000000000..34c4032867fb
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HdfsMiniCluster.h"
+
+#include "velox/exec/tests/utils/PortUtil.h"
+
+namespace facebook::velox::filesystems::test {
+void HdfsMiniCluster::start() {
+  try {
+    serverProcess_ = std::make_unique<boost::process::child>(
+        env_,
+        exePath_,
+        kJarCommand,
+        env_["HADOOP_HOME"].to_string() + kMiniclusterJar,
+        kMiniclusterCommand,
+        kNoMapReduceOption,
+        kFormatNameNodeOption,
+        kHttpPortOption,
+        httpPort_,
+        kNameNodePortOption,
+        nameNodePort_,
+        kConfigurationOption,
+        kTurnOffPermissions);
+    serverProcess_->wait_for(std::chrono::duration<int, std::milli>(60000));
+    VELOX_CHECK_EQ(
+        serverProcess_->exit_code(),
+        383,
+        "Minicluster process exited, code: {}",
+        serverProcess_->exit_code());
+  } catch (const std::exception& e) {
+    VELOX_FAIL("Failed to launch Minicluster server: {}", e.what());
+  }
+}
+
+void HdfsMiniCluster::stop() {
+  if (serverProcess_ && serverProcess_->valid()) {
+    serverProcess_->terminate();
+    serverProcess_->wait();
+    serverProcess_.reset();
+  }
+}
+
+bool HdfsMiniCluster::isRunning() {
+  if (serverProcess_) {
+    return true;
+  }
+  return false;
+}
+
+// requires hadoop executable to be on the PATH
+HdfsMiniCluster::HdfsMiniCluster() {
+  env_ = (boost::process::environment)boost::this_process::environment();
+  env_["PATH"] = env_["PATH"].to_string() + kHadoopSearchPath;
+  auto path = env_["PATH"].to_vector();
+  exePath_ = boost::process::search_path(
+      kMiniClusterExecutableName,
+      std::vector<boost::filesystem::path>(path.begin(), path.end()));
+  if (exePath_.empty()) {
+    VELOX_FAIL(
+        "Failed to find minicluster executable {}'",
+        kMiniClusterExecutableName);
+  }
+  constexpr auto kHostAddressTemplate = "hdfs://{}:{}";
+  auto ports = facebook::velox::exec::test::getFreePorts(2);
+  nameNodePort_ = fmt::format("{}", ports[0]);
+  httpPort_ = fmt::format("{}", ports[1]);
+  filesystemUrl_ = fmt::format(kHostAddressTemplate, host(), nameNodePort_);
+  boost::filesystem::path hadoopHomeDirectory = exePath_;
+  hadoopHomeDirectory.remove_leaf().remove_leaf();
+  setupEnvironment(hadoopHomeDirectory.string());
+}
+
+void HdfsMiniCluster::addFile(std::string source, std::string destination) {
+  auto filePutProcess = std::make_shared<boost::process::child>(
+      env_,
+      exePath_,
+      kFilesystemCommand,
+      kFilesystemUrlOption,
+      filesystemUrl_,
+      kFilePutOption,
+      source,
+      destination);
+  bool isExited =
+      filePutProcess->wait_for(std::chrono::duration<int, std::milli>(15000));
+  if (!isExited) {
+    VELOX_FAIL(
+        "Failed to add file to hdfs, exit code: {}",
+        filePutProcess->exit_code());
+  }
+}
+
+HdfsMiniCluster::~HdfsMiniCluster() {
+  stop();
+}
+
+void HdfsMiniCluster::setupEnvironment(const std::string& homeDirectory) {
+  env_["HADOOP_HOME"] = homeDirectory;
+  env_["HADOOP_INSTALL"] = homeDirectory;
+  env_["HADOOP_MAPRED_HOME"] = homeDirectory;
+  env_["HADOOP_COMMON_HOME"] = homeDirectory;
+  env_["HADOOP_HDFS_HOME"] = homeDirectory;
+  env_["YARN_HOME"] = homeDirectory;
+  env_["HADOOP_COMMON_LIB_NATIVE_DIR"] = homeDirectory + "/lib/native";
+  env_["HADOOP_CONF_DIR"] = homeDirectory;
+  env_["HADOOP_PREFIX"] = homeDirectory;
+  env_["HADOOP_LIBEXEC_DIR"] = homeDirectory + "/libexec";
+  env_["HADOOP_CONF_DIR"] = homeDirectory + "/etc/hadoop";
+}
+} // namespace facebook::velox::filesystems::test
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.h b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.h
new file mode 100644
index 000000000000..c54ae9589b3e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsMiniCluster.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/exec/tests/utils/TempDirectoryPath.h"
+
+#include <pwd.h>
+#include <unistd.h>
+#include <iostream>
+#include "boost/process.hpp"
+
+namespace facebook::velox::filesystems::test {
+static const std::string kMiniClusterExecutableName{"hadoop"};
+static const std::string kHadoopSearchPath{":/usr/local/hadoop/bin"};
+static const std::string kJarCommand{"jar"};
+static const std::string kMiniclusterJar{
+    "/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.0-tests.jar"};
+static const std::string kMiniclusterCommand{"minicluster"};
+static const std::string kNoMapReduceOption{"-nomr"};
+static const std::string kFormatNameNodeOption{"-format"};
+static const std::string kHttpPortOption{"-nnhttpport"};
+static const std::string kNameNodePortOption{"-nnport"};
+static const std::string kConfigurationOption{"-D"};
+static const std::string kTurnOffPermissions{"dfs.permissions=false"};
+static const std::string kFilesystemCommand{"fs"};
+static const std::string kFilesystemUrlOption{"-fs"};
+static const std::string kFilePutOption{"-put"};
+
+class HdfsMiniCluster {
+ public:
+  HdfsMiniCluster();
+
+  void start();
+
+  void stop();
+
+  bool isRunning();
+
+  void addFile(std::string source, std::string destination);
+  virtual ~HdfsMiniCluster();
+
+  std::string_view nameNodePort() const {
+    return nameNodePort_;
+  }
+
+  std::string_view url() const {
+    return filesystemUrl_;
+  }
+
+  std::string_view host() const {
+    static const std::string_view kLocalhost = "localhost";
+    return kLocalhost;
+  }
+
+ private:
+  void setupEnvironment(const std::string& homeDirectory);
+
+  std::unique_ptr<::boost::process::child> serverProcess_;
+  boost::filesystem::path exePath_;
+  boost::process::environment env_;
+  std::string nameNodePort_;
+  std::string httpPort_;
+  std::string filesystemUrl_;
+};
+} // namespace facebook::velox::filesystems::test
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsUtilTest.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsUtilTest.cpp
new file mode 100644
index 000000000000..28d4374a7f71
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/HdfsUtilTest.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsUtil.h"
+
+#include "gtest/gtest.h"
+
+using namespace facebook::velox::filesystems;
+
+TEST(HdfsUtilTest, getHdfsPath) {
+  const std::string& kScheme = "hdfs://";
+  std::string path1 =
+      getHdfsPath("hdfs://hdfsCluster/user/hive/a.txt", kScheme);
+  EXPECT_EQ("/user/hive/a.txt", path1);
+
+  std::string path2 =
+      getHdfsPath("hdfs://localhost:9000/user/hive/a.txt", kScheme);
+  EXPECT_EQ("/user/hive/a.txt", path2);
+
+  std::string path3 = getHdfsPath("hdfs:///user/hive/a.txt", kScheme);
+  EXPECT_EQ("/user/hive/a.txt", path3);
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/hdfs/tests/InsertIntoHdfsTest.cpp b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/InsertIntoHdfsTest.cpp
new file mode 100644
index 000000000000..1c8ebb902483
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/hdfs/tests/InsertIntoHdfsTest.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/Singleton.h>
+#include "HdfsMiniCluster.h"
+#include "gtest/gtest.h"
+#include "velox/connectors/common/tests/utils/HiveConnectorTestBase.h"
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/HdfsFileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/hdfs/RegisterHdfsFileSystem.h"
+#include "velox/exec/TableWriter.h"
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
+#include "velox/exec/tests/utils/PlanBuilder.h"
+
+using namespace facebook::velox;
+using namespace facebook::velox::core;
+using namespace facebook::velox::exec;
+using namespace facebook::velox::exec::test;
+using namespace facebook::velox::connector;
+//using namespace facebook::velox::connector::lakehouse::hive;
+using namespace facebook::velox::dwio::common;
+using namespace facebook::velox::test;
+
+class InsertIntoHdfsTest : public HiveConnectorTestBase {
+ public:
+  void SetUp() override {
+    HiveConnectorTestBase::SetUp();
+    filesystems::registerHdfsFileSystem();
+    if (miniCluster == nullptr) {
+      miniCluster = std::make_shared<filesystems::test::HdfsMiniCluster>();
+      miniCluster->start();
+    }
+  }
+
+  void TearDown() override {
+    HiveConnectorTestBase::TearDown();
+    miniCluster->stop();
+  }
+
+  void setDataTypes(const RowTypePtr& inputType) {
+    rowType_ = inputType;
+  }
+
+  static std::shared_ptr<filesystems::test::HdfsMiniCluster> miniCluster;
+  RowTypePtr rowType_;
+};
+
+std::shared_ptr<filesystems::test::HdfsMiniCluster>
+    InsertIntoHdfsTest::miniCluster = nullptr;
+
+TEST_F(InsertIntoHdfsTest, insertIntoHdfsTest) {
+  folly::SingletonVault::singleton()->registrationComplete();
+  const int64_t expectedRows = 1000;
+  setDataTypes(ROW(
+      {"c0", "c1", "c2", "c3"}, {BIGINT(), INTEGER(), SMALLINT(), DOUBLE()}));
+
+  auto input = makeRowVector(
+      {makeFlatVector<int64_t>(expectedRows, [](auto row) { return row; }),
+       makeFlatVector<int32_t>(expectedRows, [](auto row) { return row; }),
+       makeFlatVector<int16_t>(expectedRows, [](auto row) { return row; }),
+       makeFlatVector<double>(expectedRows, [](auto row) { return row; })});
+
+  // INSERT INTO hdfs with one writer
+  auto plan =
+      PlanBuilder()
+          .values({input})
+          .tableWrite(
+              std::string(miniCluster->url()), dwio::common::FileFormat::DWRF)
+          .planNode();
+
+  auto results = AssertQueryBuilder(plan).copyResults(pool());
+
+  // First column has number of rows written in the first row and nulls in other
+  // rows.
+  auto rowCount = results->childAt(TableWriteTraits::kRowCountChannel)
+                      ->as<FlatVector<int64_t>>();
+  ASSERT_FALSE(rowCount->isNullAt(0));
+  ASSERT_EQ(expectedRows, rowCount->valueAt(0));
+  ASSERT_TRUE(rowCount->isNullAt(1));
+
+  // Second column contains details about written files.
+  auto details = results->childAt(TableWriteTraits::kFragmentChannel)
+                     ->as<FlatVector<StringView>>();
+  ASSERT_TRUE(details->isNullAt(0));
+  ASSERT_FALSE(details->isNullAt(1));
+  folly::dynamic obj = folly::parseJson(details->valueAt(1));
+
+  ASSERT_EQ(expectedRows, obj["rowCount"].asInt());
+  auto fileWriteInfos = obj["fileWriteInfos"];
+  ASSERT_EQ(1, fileWriteInfos.size());
+
+  auto writeFileName = fileWriteInfos[0]["writeFileName"].asString();
+
+  // Read from 'writeFileName' and verify the data matches the original.
+  plan = PlanBuilder().tableScan(rowType_).planNode();
+
+  auto splits = HiveConnectorTestBase::makeHiveConnectorSplits(
+      fmt::format("{}/{}", miniCluster->url(), writeFileName),
+      1,
+      dwio::common::FileFormat::DWRF);
+  auto copy = AssertQueryBuilder(plan).split(splits[0]).copyResults(pool());
+  assertEqualResults({input}, {copy});
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/s3fs/CMakeLists.txt
new file mode 100644
index 000000000000..059f63aaffaa
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/CMakeLists.txt
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# for generated headers
+
+velox_add_library(velox_lakehouse_s3fs RegisterS3FileSystem.cpp)
+if(VELOX_ENABLE_S3)
+  velox_sources(
+    velox_lakehouse_s3fs
+    PRIVATE
+          S3FileSystem.cpp
+          S3Util.cpp
+          S3Config.cpp)
+
+  velox_include_directories(velox_lakehouse_s3fs PRIVATE ${AWSSDK_INCLUDE_DIRS})
+  velox_link_libraries(velox_lakehouse_s3fs PRIVATE velox_dwio_common Folly::folly
+                                          ${AWSSDK_LIBRARIES})
+
+  if(${VELOX_BUILD_TESTING})
+    add_subdirectory(tests)
+  endif()
+endif()
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.cpp
new file mode 100644
index 000000000000..61094ee6d4c9
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "RegisterS3FileSystem.h" // @manual
+
+#ifdef VELOX_ENABLE_S3
+#include "velox/common/base/StatsReporter.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.h" // @manual
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Counters.h" // @manual
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h" // @manual
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h" // @manual
+#include "velox/dwio/common/FileSink.h"
+#endif
+
+namespace facebook::velox::filesystems {
+
+#ifdef VELOX_ENABLE_S3
+using FileSystemMap = folly::Synchronized<
+    std::unordered_map<std::string, std::shared_ptr<FileSystem>>>;
+
+/// Multiple S3 filesystems are supported.
+/// Key is the endpoint value specified in the config using hive.s3.endpoint.
+/// If the endpoint is empty, it will default to AWS S3 Library.
+/// Different S3 buckets can be accessed with different client configurations.
+/// This allows for different endpoints, data read and write strategies.
+/// The bucket specific option is set by replacing the hive.s3. prefix on an
+/// option with hive.s3.bucket.BUCKETNAME., where BUCKETNAME is the name of the
+/// bucket. When connecting to a bucket, all options explicitly set will
+/// override the base hive.s3. values.
+
+FileSystemMap& fileSystems() {
+  static FileSystemMap instances;
+  return instances;
+}
+
+CacheKeyFn cacheKeyFunc;
+
+std::shared_ptr<FileSystem> fileSystemGenerator(
+    std::shared_ptr<const config::ConfigBase> properties,
+    std::string_view s3Path) {
+  std::string cacheKey, bucketName, key;
+  getBucketAndKeyFromPath(getPath(s3Path), bucketName, key);
+  if (!cacheKeyFunc) {
+    cacheKey = S3Config::cacheKey(bucketName, properties);
+  } else {
+    cacheKey = cacheKeyFunc(properties, s3Path);
+  }
+
+  // Check if an instance exists with a read lock (shared).
+  auto fs = fileSystems().withRLock(
+      [&](auto& instanceMap) -> std::shared_ptr<FileSystem> {
+        auto iterator = instanceMap.find(cacheKey);
+        if (iterator != instanceMap.end()) {
+          return iterator->second;
+        }
+        return nullptr;
+      });
+  if (fs != nullptr) {
+    return fs;
+  }
+
+  return fileSystems().withWLock(
+      [&](auto& instanceMap) -> std::shared_ptr<FileSystem> {
+        // Repeat the checks with a write lock.
+        auto iterator = instanceMap.find(cacheKey);
+        if (iterator != instanceMap.end()) {
+          return iterator->second;
+        }
+
+        auto logLevel =
+            properties->get(S3Config::kS3LogLevel, std::string("FATAL"));
+        std::optional<std::string> logLocation =
+            static_cast<std::optional<std::string>>(
+                properties->get<std::string>(S3Config::kS3LogLocation));
+        initializeS3(logLevel, logLocation);
+        auto fs = std::make_shared<S3FileSystem>(bucketName, properties);
+        instanceMap.insert({cacheKey, fs});
+        return fs;
+      });
+}
+
+std::unique_ptr<velox::dwio::common::FileSink> s3WriteFileSinkGenerator(
+    const std::string& fileURI,
+    const velox::dwio::common::FileSink::Options& options) {
+  if (isS3File(fileURI)) {
+    auto fileSystem =
+        filesystems::getFileSystem(fileURI, options.connectorProperties);
+    return std::make_unique<dwio::common::WriteFileSink>(
+        fileSystem->openFileForWrite(fileURI, {{}, options.pool, std::nullopt}),
+        fileURI,
+        options.metricLogger,
+        options.stats);
+  }
+  return nullptr;
+}
+#endif
+
+void registerS3FileSystem(CacheKeyFn identityFunction) {
+#ifdef VELOX_ENABLE_S3
+  fileSystems().withWLock([&](auto& instanceMap) {
+    if (instanceMap.empty()) {
+      cacheKeyFunc = identityFunction;
+      registerFileSystem(isS3File, std::function(fileSystemGenerator));
+      dwio::common::FileSink::registerFactory(
+          std::function(s3WriteFileSinkGenerator));
+    }
+  });
+#endif
+}
+
+void finalizeS3FileSystem() {
+#ifdef VELOX_ENABLE_S3
+  bool singleUseCount = true;
+  fileSystems().withWLock([&](auto& instanceMap) {
+    for (const auto& [id, fs] : instanceMap) {
+      singleUseCount &= (fs.use_count() == 1);
+    }
+    VELOX_CHECK(singleUseCount, "Cannot finalize S3FileSystem while in use");
+    instanceMap.clear();
+  });
+
+  finalizeS3();
+#endif
+}
+
+void registerS3Metrics() {
+#ifdef VELOX_ENABLE_S3
+  DEFINE_METRIC(kMetricS3ActiveConnections, velox::StatType::SUM);
+  DEFINE_METRIC(kMetricS3StartedUploads, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3FailedUploads, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3SuccessfulUploads, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3MetadataCalls, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3GetObjectCalls, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3GetObjectErrors, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3GetMetadataErrors, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3GetObjectRetries, velox::StatType::COUNT);
+  DEFINE_METRIC(kMetricS3GetMetadataRetries, velox::StatType::COUNT);
+#endif
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h b/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h
new file mode 100644
index 000000000000..890e38db7652
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::filesystems {
+
+using CacheKeyFn = std::function<
+    std::string(std::shared_ptr<const config::ConfigBase>, std::string_view)>;
+
+// Register the S3 filesystem.
+void registerS3FileSystem(CacheKeyFn cacheKeyFunc = nullptr);
+
+void registerS3Metrics();
+
+/// Teardown the AWS SDK C++.
+/// Velox users need to manually invoke this before exiting an application.
+/// This is because Velox uses a static object to hold the S3 FileSystem
+/// instance. AWS C++ SDK library also uses static global objects in its code.
+/// The order of static object destruction is not determined by the C++
+/// standard.
+/// This could lead to a segmentation fault during the program exit.
+/// Ref https://github.com/aws/aws-sdk-cpp/issues/1550#issuecomment-1412601061
+void finalizeS3FileSystem();
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.cpp
new file mode 100644
index 000000000000..947dbd5d08af
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "S3Config.h"
+
+#include "S3Util.h"
+#include "velox/common/config/Config.h"
+
+namespace facebook::velox::filesystems {
+
+std::string S3Config::cacheKey(
+    std::string_view bucket,
+    std::shared_ptr<const config::ConfigBase> config) {
+  auto bucketEndpoint = bucketConfigKey(Keys::kEndpoint, bucket);
+  if (config->valueExists(bucketEndpoint)) {
+    return fmt::format(
+        "{}-{}", config->get<std::string>(bucketEndpoint).value(), bucket);
+  }
+  auto baseEndpoint = baseConfigKey(Keys::kEndpoint);
+  if (config->valueExists(baseEndpoint)) {
+    return fmt::format(
+        "{}-{}", config->get<std::string>(baseEndpoint).value(), bucket);
+  }
+  return std::string(bucket);
+}
+
+S3Config::S3Config(
+    std::string_view bucket,
+    const std::shared_ptr<const config::ConfigBase> properties) {
+  for (int key = static_cast<int>(Keys::kBegin);
+       key < static_cast<int>(Keys::kEnd);
+       key++) {
+    auto s3Key = static_cast<Keys>(key);
+    auto value = S3Config::configTraits().find(s3Key)->second;
+    auto configSuffix = value.first;
+    auto configDefault = value.second;
+
+    // Set bucket S3 config "hive.s3.bucket.*" if present.
+    std::stringstream bucketConfig;
+    bucketConfig << kS3BucketPrefix << bucket << "." << configSuffix;
+    auto configVal = static_cast<std::optional<std::string>>(
+        properties->get<std::string>(bucketConfig.str()));
+    if (configVal.has_value()) {
+      config_[s3Key] = configVal.value();
+    } else {
+      // Set base config "hive.s3.*" if present.
+      std::stringstream baseConfig;
+      baseConfig << kS3Prefix << configSuffix;
+      configVal = static_cast<std::optional<std::string>>(
+          properties->get<std::string>(baseConfig.str()));
+      if (configVal.has_value()) {
+        config_[s3Key] = configVal.value();
+      } else {
+        // Set the default value.
+        config_[s3Key] = configDefault;
+      }
+    }
+  }
+  payloadSigningPolicy_ =
+      properties->get<std::string>(kS3PayloadSigningPolicy, "Never");
+}
+
+std::optional<std::string> S3Config::endpointRegion() const {
+  auto region = config_.find(Keys::kEndpointRegion)->second;
+  if (!region.has_value()) {
+    // If region is not set, try inferring from the endpoint value for AWS
+    // endpoints.
+    auto endpointValue = endpoint();
+    if (endpointValue.has_value()) {
+      region = parseAWSStandardRegionName(endpointValue.value());
+    }
+  }
+  return region;
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.h b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.h
new file mode 100644
index 000000000000..3215dee4e660
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <optional>
+#include <string>
+#include "velox/common/base/Exceptions.h"
+
+namespace facebook::velox::config {
+class ConfigBase;
+}
+
+namespace facebook::velox::filesystems {
+
+/// Build config required to initialize an S3FileSystem instance.
+/// All hive.s3 options can be set on a per-bucket basis.
+/// The bucket-specific option is set by replacing the hive.s3. prefix on an
+/// option with hive.s3.bucket.BUCKETNAME., where BUCKETNAME is the name of the
+/// bucket.
+/// When connecting to a bucket, all options explicitly set will override the
+/// base hive.s3. values.
+/// These semantics are similar to the Apache Hadoop-Aws module.
+/// https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html
+class S3Config {
+ public:
+  S3Config() = delete;
+
+  /// S3 config prefix.
+  static constexpr const char* kS3Prefix = "hive.s3.";
+
+  /// S3 bucket config prefix
+  static constexpr const char* kS3BucketPrefix = "hive.s3.bucket.";
+
+  /// Log granularity of AWS C++ SDK.
+  static constexpr const char* kS3LogLevel = "hive.s3.log-level";
+
+  /// Payload signing policy.
+  static constexpr const char* kS3PayloadSigningPolicy =
+      "hive.s3.payload-signing-policy";
+
+  /// S3FileSystem default identity.
+  static constexpr const char* kDefaultS3Identity = "s3-default-identity";
+
+  /// Log location of AWS C++ SDK.
+  static constexpr const char* kS3LogLocation = "hive.s3.log-location";
+
+  /// Keys to identify the config.
+  enum class Keys {
+    kBegin,
+    kEndpoint = kBegin,
+    kEndpointRegion,
+    kAccessKey,
+    kSecretKey,
+    kPathStyleAccess,
+    kSSLEnabled,
+    kUseInstanceCredentials,
+    kIamRole,
+    kIamRoleSessionName,
+    kConnectTimeout,
+    kSocketTimeout,
+    kMaxConnections,
+    kMaxAttempts,
+    kRetryMode,
+    kUseProxyFromEnv,
+    kEnd
+  };
+
+  /// Map of keys -> <suffixString, optional defaultValue>.
+  /// New config must be added here along with a getter function below.
+  static const std::unordered_map<
+      Keys,
+      std::pair<std::string_view, std::optional<std::string_view>>>&
+  configTraits() {
+    static const std::unordered_map<
+        Keys,
+        std::pair<std::string_view, std::optional<std::string_view>>>
+        config = {
+            {Keys::kEndpoint, std::make_pair("endpoint", std::nullopt)},
+            {Keys::kEndpointRegion,
+             std::make_pair("endpoint.region", std::nullopt)},
+            {Keys::kAccessKey, std::make_pair("aws-access-key", std::nullopt)},
+            {Keys::kSecretKey, std::make_pair("aws-secret-key", std::nullopt)},
+            {Keys::kPathStyleAccess,
+             std::make_pair("path-style-access", "false")},
+            {Keys::kSSLEnabled, std::make_pair("ssl.enabled", "true")},
+            {Keys::kUseInstanceCredentials,
+             std::make_pair("use-instance-credentials", "false")},
+            {Keys::kIamRole, std::make_pair("iam-role", std::nullopt)},
+            {Keys::kIamRoleSessionName,
+             std::make_pair("iam-role-session-name", "velox-session")},
+            {Keys::kConnectTimeout,
+             std::make_pair("connect-timeout", std::nullopt)},
+            {Keys::kSocketTimeout,
+             std::make_pair("socket-timeout", std::nullopt)},
+            {Keys::kMaxConnections,
+             std::make_pair("max-connections", std::nullopt)},
+            {Keys::kMaxAttempts, std::make_pair("max-attempts", std::nullopt)},
+            {Keys::kRetryMode, std::make_pair("retry-mode", std::nullopt)},
+            {Keys::kUseProxyFromEnv,
+             std::make_pair("use-proxy-from-env", "false")}};
+    return config;
+  }
+
+  S3Config(
+      std::string_view bucket,
+      std::shared_ptr<const config::ConfigBase> config);
+
+  /// cacheKey is used as a key for the S3FileSystem instance map.
+  /// This will be the bucket endpoint or the base endpoint if they exist plus
+  /// bucket name.
+  static std::string cacheKey(
+      std::string_view bucket,
+      std::shared_ptr<const config::ConfigBase> config);
+
+  /// Return the base config for the input Key.
+  static std::string baseConfigKey(Keys key) {
+    std::stringstream buffer;
+    buffer << kS3Prefix << configTraits().find(key)->second.first;
+    return buffer.str();
+  }
+
+  /// Return the bucket config for the input key.
+  static std::string bucketConfigKey(Keys key, std::string_view bucket) {
+    std::stringstream buffer;
+    buffer << kS3BucketPrefix << bucket << "."
+           << configTraits().find(key)->second.first;
+    return buffer.str();
+  }
+
+  /// The S3 storage endpoint server. This can be used to connect to an
+  /// S3-compatible storage system instead of AWS.
+  std::optional<std::string> endpoint() const {
+    return config_.find(Keys::kEndpoint)->second;
+  }
+
+  /// The S3 storage endpoint region.
+  std::optional<std::string> endpointRegion() const;
+
+  /// Access key to use.
+  std::optional<std::string> accessKey() const {
+    return config_.find(Keys::kAccessKey)->second;
+  }
+
+  /// Secret key to use
+  std::optional<std::string> secretKey() const {
+    return config_.find(Keys::kSecretKey)->second;
+  }
+
+  /// Virtual addressing is used for AWS S3 and is the default
+  /// (path-style-access is false). Path access style is used for some on-prem
+  /// systems like Minio.
+  bool useVirtualAddressing() const {
+    auto value = config_.find(Keys::kPathStyleAccess)->second.value();
+    return !folly::to<bool>(value);
+  }
+
+  /// Use HTTPS to communicate with the S3 API.
+  bool useSSL() const {
+    auto value = config_.find(Keys::kSSLEnabled)->second.value();
+    return folly::to<bool>(value);
+  }
+
+  /// Use the EC2 metadata service to retrieve API credentials.
+  bool useInstanceCredentials() const {
+    auto value = config_.find(Keys::kUseInstanceCredentials)->second.value();
+    return folly::to<bool>(value);
+  }
+
+  /// IAM role to assume.
+  std::optional<std::string> iamRole() const {
+    return config_.find(Keys::kIamRole)->second;
+  }
+
+  /// Session name associated with the IAM role.
+  std::string iamRoleSessionName() const {
+    return config_.find(Keys::kIamRoleSessionName)->second.value();
+  }
+
+  /// Socket connect timeout.
+  std::optional<std::string> connectTimeout() const {
+    return config_.find(Keys::kConnectTimeout)->second;
+  }
+
+  /// Socket read timeout.
+  std::optional<std::string> socketTimeout() const {
+    return config_.find(Keys::kSocketTimeout)->second;
+  }
+
+  /// Maximum concurrent TCP connections for a single http client.
+  std::optional<uint32_t> maxConnections() const {
+    auto val = config_.find(Keys::kMaxConnections)->second;
+    if (val.has_value()) {
+      return folly::to<uint32_t>(val.value());
+    }
+    return std::optional<uint32_t>();
+  }
+
+  /// Maximum retry attempts for a single http client.
+  std::optional<int32_t> maxAttempts() const {
+    auto val = config_.find(Keys::kMaxAttempts)->second;
+    if (val.has_value()) {
+      return folly::to<int32_t>(val.value());
+    }
+    return std::optional<int32_t>();
+  }
+
+  /// Retry mode for a single http client.
+  std::optional<std::string> retryMode() const {
+    return config_.find(Keys::kRetryMode)->second;
+  }
+
+  bool useProxyFromEnv() const {
+    auto value = config_.find(Keys::kUseProxyFromEnv)->second.value();
+    return folly::to<bool>(value);
+  }
+
+  std::string payloadSigningPolicy() const {
+    return payloadSigningPolicy_;
+  }
+
+ private:
+  std::unordered_map<Keys, std::optional<std::string>> config_;
+  std::string payloadSigningPolicy_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3Counters.h b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Counters.h
new file mode 100644
index 000000000000..087e8bb860f9
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Counters.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+namespace facebook::velox::filesystems {
+
+// The number of connections open for S3 read operations.
+constexpr std::string_view kMetricS3ActiveConnections{
+    "velox.s3.active_connections"};
+
+// The number of S3 upload calls that started.
+constexpr std::string_view kMetricS3StartedUploads{"velox.s3.started_uploads"};
+
+// The number of S3 upload calls that were completed.
+constexpr std::string_view kMetricS3SuccessfulUploads{
+    "velox.s3.successful_uploads"};
+
+// The number of S3 upload calls that failed.
+constexpr std::string_view kMetricS3FailedUploads{"velox.s3.failed_uploads"};
+
+// The number of S3 head (metadata) calls.
+constexpr std::string_view kMetricS3MetadataCalls{"velox.s3.metadata_calls"};
+
+// The number of S3 head (metadata) calls that failed.
+constexpr std::string_view kMetricS3GetMetadataErrors{
+    "velox.s3.get_metadata_errors"};
+
+// The number of retries made during S3 head (metadata) calls.
+constexpr std::string_view kMetricS3GetMetadataRetries{
+    "velox.s3.get_metadata_retries"};
+
+// The number of S3 getObject calls.
+constexpr std::string_view kMetricS3GetObjectCalls{"velox.s3.get_object_calls"};
+
+// The number of S3 getObject calls that failed.
+constexpr std::string_view kMetricS3GetObjectErrors{
+    "velox.s3.get_object_errors"};
+
+// The number of retries made during S3 getObject calls.
+constexpr std::string_view kMetricS3GetObjectRetries{
+    "velox.s3.get_object_retries"};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.cpp
new file mode 100644
index 000000000000..52a1bfca08d4
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.cpp
@@ -0,0 +1,838 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "S3FileSystem.h"
+#include "S3Config.h"
+#include "S3Counters.h"
+#include "S3Util.h"
+#include "S3WriteFile.h"
+#include "velox/common/base/StatsReporter.h"
+#include "velox/common/config/Config.h"
+#include "velox/common/file/File.h"
+#include "velox/dwio/common/DataBuffer.h"
+
+#include <fmt/format.h>
+#include <glog/logging.h>
+#include <memory>
+#include <stdexcept>
+
+#include <aws/core/Aws.h>
+#include <aws/core/auth/AWSCredentialsProviderChain.h>
+#include <aws/core/client/AdaptiveRetryStrategy.h>
+#include <aws/core/client/DefaultRetryStrategy.h>
+#include <aws/core/http/HttpResponse.h>
+#include <aws/core/utils/logging/ConsoleLogSystem.h>
+#include <aws/core/utils/stream/PreallocatedStreamBuf.h>
+#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
+#include <aws/s3/S3Client.h>
+#include <aws/s3/model/CompleteMultipartUploadRequest.h>
+#include <aws/s3/model/CompletedMultipartUpload.h>
+#include <aws/s3/model/CompletedPart.h>
+#include <aws/s3/model/CreateBucketRequest.h>
+#include <aws/s3/model/CreateMultipartUploadRequest.h>
+#include <aws/s3/model/GetObjectRequest.h>
+#include <aws/s3/model/HeadBucketRequest.h>
+#include <aws/s3/model/HeadObjectRequest.h>
+#include <aws/s3/model/UploadPartRequest.h>
+
+namespace facebook::velox::filesystems {
+namespace {
+// Reference: https://issues.apache.org/jira/browse/ARROW-8692
+// https://github.com/apache/arrow/blob/master/cpp/src/arrow/filesystem/s3fs.cc#L843
+// A non-copying iostream. See
+// https://stackoverflow.com/questions/35322033/aws-c-sdk-uploadpart-times-out
+// https://stackoverflow.com/questions/13059091/creating-an-input-stream-from-constant-memory
+class StringViewStream : Aws::Utils::Stream::PreallocatedStreamBuf,
+                         public std::iostream {
+ public:
+  StringViewStream(const void* data, int64_t nbytes)
+      : Aws::Utils::Stream::PreallocatedStreamBuf(
+            reinterpret_cast<unsigned char*>(const_cast<void*>(data)),
+            static_cast<size_t>(nbytes)),
+        std::iostream(this) {}
+};
+
+// By default, the AWS SDK reads object data into an auto-growing StringStream.
+// To avoid copies, read directly into a pre-allocated buffer instead.
+// See https://github.com/aws/aws-sdk-cpp/issues/64 for an alternative but
+// functionally similar recipe.
+Aws::IOStreamFactory AwsWriteableStreamFactory(void* data, int64_t nbytes) {
+  return [=]() { return Aws::New<StringViewStream>("", data, nbytes); };
+}
+
+class S3ReadFile final : public ReadFile {
+ public:
+  S3ReadFile(std::string_view path, Aws::S3::S3Client* client)
+      : client_(client) {
+    getBucketAndKeyFromPath(path, bucket_, key_);
+  }
+
+  // Gets the length of the file.
+  // Checks if there are any issues reading the file.
+  void initialize(const filesystems::FileOptions& options) {
+    if (options.fileSize.has_value()) {
+      VELOX_CHECK_GE(
+          options.fileSize.value(), 0, "File size must be non-negative");
+      length_ = options.fileSize.value();
+    }
+
+    // Make it a no-op if invoked twice.
+    if (length_ != -1) {
+      return;
+    }
+
+    Aws::S3::Model::HeadObjectRequest request;
+    request.SetBucket(awsString(bucket_));
+    request.SetKey(awsString(key_));
+
+    RECORD_METRIC_VALUE(kMetricS3MetadataCalls);
+    auto outcome = client_->HeadObject(request);
+    if (!outcome.IsSuccess()) {
+      RECORD_METRIC_VALUE(kMetricS3GetMetadataErrors);
+    }
+    RECORD_METRIC_VALUE(kMetricS3GetMetadataRetries, outcome.GetRetryCount());
+    VELOX_CHECK_AWS_OUTCOME(
+        outcome, "Failed to get metadata for S3 object", bucket_, key_);
+    length_ = outcome.GetResult().GetContentLength();
+    VELOX_CHECK_GE(length_, 0);
+  }
+
+  std::string_view pread(
+      uint64_t offset,
+      uint64_t length,
+      void* buffer,
+      File::IoStats* stats) const override {
+    preadInternal(offset, length, static_cast<char*>(buffer));
+    return {static_cast<char*>(buffer), length};
+  }
+
+  std::string pread(uint64_t offset, uint64_t length, File::IoStats* stats)
+      const override {
+    std::string result(length, 0);
+    char* position = result.data();
+    preadInternal(offset, length, position);
+    return result;
+  }
+
+  uint64_t preadv(
+      uint64_t offset,
+      const std::vector<folly::Range<char*>>& buffers,
+      File::IoStats* stats) const override {
+    // 'buffers' contains Ranges(data, size)  with some gaps (data = nullptr) in
+    // between. This call must populate the ranges (except gap ranges)
+    // sequentially starting from 'offset'. AWS S3 GetObject does not support
+    // multi-range. AWS S3 also charges by number of read requests and not size.
+    // The idea here is to use a single read spanning all the ranges and then
+    // populate individual ranges. We pre-allocate a buffer to support this.
+    size_t length = 0;
+    for (const auto range : buffers) {
+      length += range.size();
+    }
+    // TODO: allocate from a memory pool
+    std::string result(length, 0);
+    preadInternal(offset, length, static_cast<char*>(result.data()));
+    size_t resultOffset = 0;
+    for (auto range : buffers) {
+      if (range.data()) {
+        memcpy(range.data(), &(result.data()[resultOffset]), range.size());
+      }
+      resultOffset += range.size();
+    }
+    return length;
+  }
+
+  uint64_t size() const override {
+    return length_;
+  }
+
+  uint64_t memoryUsage() const override {
+    // TODO: Check if any buffers are being used by the S3 library
+    return sizeof(Aws::S3::S3Client) + kS3MaxKeySize + 2 * sizeof(std::string) +
+        sizeof(int64_t);
+  }
+
+  bool shouldCoalesce() const final {
+    return false;
+  }
+
+  std::string getName() const final {
+    return fmt::format("s3://{}/{}", bucket_, key_);
+  }
+
+  uint64_t getNaturalReadSize() const final {
+    return 72 << 20;
+  }
+
+ private:
+  // The assumption here is that "position" has space for at least "length"
+  // bytes.
+  void preadInternal(uint64_t offset, uint64_t length, char* position) const {
+    // Read the desired range of bytes.
+    Aws::S3::Model::GetObjectRequest request;
+    Aws::S3::Model::GetObjectResult result;
+
+    request.SetBucket(awsString(bucket_));
+    request.SetKey(awsString(key_));
+    std::stringstream ss;
+    ss << "bytes=" << offset << "-" << offset + length - 1;
+    request.SetRange(awsString(ss.str()));
+    request.SetResponseStreamFactory(
+        AwsWriteableStreamFactory(position, length));
+    RECORD_METRIC_VALUE(kMetricS3ActiveConnections);
+    RECORD_METRIC_VALUE(kMetricS3GetObjectCalls);
+    auto outcome = client_->GetObject(request);
+    if (!outcome.IsSuccess()) {
+      RECORD_METRIC_VALUE(kMetricS3GetObjectErrors);
+    }
+    RECORD_METRIC_VALUE(kMetricS3GetObjectRetries, outcome.GetRetryCount());
+    RECORD_METRIC_VALUE(kMetricS3ActiveConnections, -1);
+    VELOX_CHECK_AWS_OUTCOME(outcome, "Failed to get S3 object", bucket_, key_);
+  }
+
+  Aws::S3::S3Client* client_;
+  std::string bucket_;
+  std::string key_;
+  int64_t length_ = -1;
+};
+
+Aws::Utils::Logging::LogLevel inferS3LogLevel(std::string_view logLevel) {
+  std::string level = std::string(logLevel);
+  // Convert to upper case.
+  std::transform(
+      level.begin(), level.end(), level.begin(), [](unsigned char c) {
+        return std::toupper(c);
+      });
+  if (level == "FATAL") {
+    return Aws::Utils::Logging::LogLevel::Fatal;
+  } else if (level == "TRACE") {
+    return Aws::Utils::Logging::LogLevel::Trace;
+  } else if (level == "OFF") {
+    return Aws::Utils::Logging::LogLevel::Off;
+  } else if (level == "ERROR") {
+    return Aws::Utils::Logging::LogLevel::Error;
+  } else if (level == "WARN") {
+    return Aws::Utils::Logging::LogLevel::Warn;
+  } else if (level == "INFO") {
+    return Aws::Utils::Logging::LogLevel::Info;
+  } else if (level == "DEBUG") {
+    return Aws::Utils::Logging::LogLevel::Debug;
+  }
+  return Aws::Utils::Logging::LogLevel::Fatal;
+}
+
+// Supported values are "Always", "RequestDependent", "Never"(default).
+Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy inferPayloadSign(
+    std::string sign) {
+  // Convert to upper case.
+  std::transform(sign.begin(), sign.end(), sign.begin(), [](unsigned char c) {
+    return std::toupper(c);
+  });
+  if (sign == "ALWAYS") {
+    return Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Always;
+  } else if (sign == "REQUESTDEPENDENT") {
+    return Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::RequestDependent;
+  }
+  return Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never;
+}
+} // namespace
+
+class S3WriteFile::Impl {
+ public:
+  explicit Impl(
+      std::string_view path,
+      Aws::S3::S3Client* client,
+      memory::MemoryPool* pool)
+      : client_(client), pool_(pool) {
+    VELOX_CHECK_NOT_NULL(client);
+    VELOX_CHECK_NOT_NULL(pool);
+    getBucketAndKeyFromPath(path, bucket_, key_);
+    currentPart_ = std::make_unique<dwio::common::DataBuffer<char>>(*pool_);
+    currentPart_->reserve(kPartUploadSize);
+    // Check that the object doesn't exist, if it does throw an error.
+    {
+      Aws::S3::Model::HeadObjectRequest request;
+      request.SetBucket(awsString(bucket_));
+      request.SetKey(awsString(key_));
+      RECORD_METRIC_VALUE(kMetricS3MetadataCalls);
+      auto objectMetadata = client_->HeadObject(request);
+      if (!objectMetadata.IsSuccess()) {
+        RECORD_METRIC_VALUE(kMetricS3GetMetadataErrors);
+      }
+      RECORD_METRIC_VALUE(
+          kMetricS3GetObjectRetries, objectMetadata.GetRetryCount());
+      VELOX_CHECK(!objectMetadata.IsSuccess(), "S3 object already exists");
+    }
+
+    // Create bucket if not present.
+    {
+      Aws::S3::Model::HeadBucketRequest request;
+      request.SetBucket(awsString(bucket_));
+      auto bucketMetadata = client_->HeadBucket(request);
+      if (!bucketMetadata.IsSuccess()) {
+        Aws::S3::Model::CreateBucketRequest request;
+        request.SetBucket(bucket_);
+        auto outcome = client_->CreateBucket(request);
+        VELOX_CHECK_AWS_OUTCOME(
+            outcome, "Failed to create S3 bucket", bucket_, "");
+      }
+    }
+
+    // Initiate the multi-part upload.
+    {
+      Aws::S3::Model::CreateMultipartUploadRequest request;
+      request.SetBucket(awsString(bucket_));
+      request.SetKey(awsString(key_));
+
+      /// If we do not set anything then the SDK will default to application/xml
+      /// which confuses some tools
+      /// (https://github.com/apache/arrow/issues/11934). So we instead default
+      /// to application/octet-stream which is less misleading.
+      request.SetContentType(kApplicationOctetStream);
+      // The default algorithm used is MD5. However, MD5 is not supported with
+      // fips and can cause a SIGSEGV. Set CRC32 instead which is a standard for
+      // checksum computation and is not restricted by fips.
+      request.SetChecksumAlgorithm(Aws::S3::Model::ChecksumAlgorithm::CRC32);
+
+      auto outcome = client_->CreateMultipartUpload(request);
+      VELOX_CHECK_AWS_OUTCOME(
+          outcome, "Failed initiating multiple part upload", bucket_, key_);
+      uploadState_.id = outcome.GetResult().GetUploadId();
+    }
+
+    fileSize_ = 0;
+  }
+
+  // Appends data to the end of the file.
+  void append(std::string_view data) {
+    VELOX_CHECK(!closed(), "File is closed");
+    if (data.size() + currentPart_->size() >= kPartUploadSize) {
+      upload(data);
+    } else {
+      // Append to current part.
+      currentPart_->unsafeAppend(data.data(), data.size());
+    }
+    fileSize_ += data.size();
+  }
+
+  // No-op.
+  void flush() {
+    VELOX_CHECK(!closed(), "File is closed");
+    /// currentPartSize must be less than kPartUploadSize since
+    /// append() would have already flushed after reaching kUploadPartSize.
+    VELOX_CHECK_LT(currentPart_->size(), kPartUploadSize);
+  }
+
+  // Complete the multipart upload and close the file.
+  void close() {
+    if (closed()) {
+      return;
+    }
+    RECORD_METRIC_VALUE(kMetricS3StartedUploads);
+    uploadPart({currentPart_->data(), currentPart_->size()}, true);
+    VELOX_CHECK_EQ(uploadState_.partNumber, uploadState_.completedParts.size());
+    // Complete the multipart upload.
+    {
+      Aws::S3::Model::CompletedMultipartUpload completedUpload;
+      completedUpload.SetParts(uploadState_.completedParts);
+      Aws::S3::Model::CompleteMultipartUploadRequest request;
+      request.SetBucket(awsString(bucket_));
+      request.SetKey(awsString(key_));
+      request.SetUploadId(uploadState_.id);
+      request.SetMultipartUpload(std::move(completedUpload));
+
+      auto outcome = client_->CompleteMultipartUpload(request);
+      if (outcome.IsSuccess()) {
+        RECORD_METRIC_VALUE(kMetricS3SuccessfulUploads);
+      } else {
+        RECORD_METRIC_VALUE(kMetricS3FailedUploads);
+      }
+      VELOX_CHECK_AWS_OUTCOME(
+          outcome, "Failed to complete multiple part upload", bucket_, key_);
+    }
+    currentPart_->clear();
+  }
+
+  // Current file size, i.e. the sum of all previous appends.
+  uint64_t size() const {
+    return fileSize_;
+  }
+
+  int numPartsUploaded() const {
+    return uploadState_.partNumber;
+  }
+
+ private:
+  static constexpr int64_t kPartUploadSize = 10 * 1024 * 1024;
+  static constexpr const char* kApplicationOctetStream =
+      "application/octet-stream";
+
+  bool closed() const {
+    return (currentPart_->capacity() == 0);
+  }
+
+  // Holds state for the multipart upload.
+  struct UploadState {
+    Aws::Vector<Aws::S3::Model::CompletedPart> completedParts;
+    int64_t partNumber = 0;
+    Aws::String id;
+  };
+  UploadState uploadState_;
+
+  // Data can be smaller or larger than the kPartUploadSize.
+  // Complete the currentPart_ and upload kPartUploadSize chunks of data.
+  // Save the remaining into currentPart_.
+  void upload(const std::string_view data) {
+    auto dataPtr = data.data();
+    auto dataSize = data.size();
+    // Fill-up the remaining currentPart_.
+    auto remainingBufferSize = currentPart_->capacity() - currentPart_->size();
+    currentPart_->unsafeAppend(dataPtr, remainingBufferSize);
+    uploadPart({currentPart_->data(), currentPart_->size()});
+    dataPtr += remainingBufferSize;
+    dataSize -= remainingBufferSize;
+    while (dataSize > kPartUploadSize) {
+      uploadPart({dataPtr, kPartUploadSize});
+      dataPtr += kPartUploadSize;
+      dataSize -= kPartUploadSize;
+    }
+    // Stash the remaining at the beginning of currentPart.
+    currentPart_->unsafeAppend(0, dataPtr, dataSize);
+  }
+
+  void uploadPart(const std::string_view part, bool isLast = false) {
+    // Only the last part can be less than kPartUploadSize.
+    VELOX_CHECK(isLast || (!isLast && (part.size() == kPartUploadSize)));
+    // Upload the part.
+    {
+      Aws::S3::Model::UploadPartRequest request;
+      request.SetBucket(bucket_);
+      request.SetKey(key_);
+      request.SetUploadId(uploadState_.id);
+      request.SetPartNumber(++uploadState_.partNumber);
+      request.SetContentLength(part.size());
+      request.SetBody(
+          std::make_shared<StringViewStream>(part.data(), part.size()));
+      // The default algorithm used is MD5. However, MD5 is not supported with
+      // fips and can cause a SIGSEGV. Set CRC32 instead which is a standard for
+      // checksum computation and is not restricted by fips.
+      request.SetChecksumAlgorithm(Aws::S3::Model::ChecksumAlgorithm::CRC32);
+      auto outcome = client_->UploadPart(request);
+      VELOX_CHECK_AWS_OUTCOME(outcome, "Failed to upload", bucket_, key_);
+      // Append ETag and part number for this uploaded part.
+      // This will be needed for upload completion in Close().
+      auto result = outcome.GetResult();
+      Aws::S3::Model::CompletedPart part;
+
+      part.SetPartNumber(uploadState_.partNumber);
+      part.SetETag(result.GetETag());
+      // Don't add the checksum to the part if the checksum is empty.
+      // Some filesystems such as IBM COS require this to be not set.
+      if (!result.GetChecksumCRC32().empty()) {
+        part.SetChecksumCRC32(result.GetChecksumCRC32());
+      }
+      uploadState_.completedParts.push_back(std::move(part));
+    }
+  }
+
+  Aws::S3::S3Client* client_;
+  memory::MemoryPool* pool_;
+  std::unique_ptr<dwio::common::DataBuffer<char>> currentPart_;
+  std::string bucket_;
+  std::string key_;
+  size_t fileSize_ = -1;
+};
+
+S3WriteFile::S3WriteFile(
+    std::string_view path,
+    Aws::S3::S3Client* client,
+    memory::MemoryPool* pool) {
+  impl_ = std::make_shared<Impl>(path, client, pool);
+}
+
+void S3WriteFile::append(std::string_view data) {
+  return impl_->append(data);
+}
+
+void S3WriteFile::flush() {
+  impl_->flush();
+}
+
+void S3WriteFile::close() {
+  impl_->close();
+}
+
+uint64_t S3WriteFile::size() const {
+  return impl_->size();
+}
+
+int S3WriteFile::numPartsUploaded() const {
+  return impl_->numPartsUploaded();
+}
+
+// Initialize and Finalize the AWS SDK C++ library.
+// Initialization must be done before creating a S3FileSystem.
+// Finalization must be done after all S3FileSystem instances have been deleted.
+// After Finalize, no new S3FileSystem can be created.
+struct AwsInstance {
+  AwsInstance() : isInitialized_(false), isFinalized_(false) {}
+  ~AwsInstance() {
+    finalize(/*from_destructor=*/true);
+  }
+
+  // Returns true iff the instance was newly initialized with config.
+  bool initialize(
+      std::string_view logLevel,
+      std::optional<std::string_view> logLocation) {
+    if (isFinalized_.load()) {
+      VELOX_FAIL("Attempt to initialize S3 after it has been finalized.");
+    }
+    if (!isInitialized_.exchange(true)) {
+      // Not already initialized.
+      doInitialize(logLevel, logLocation);
+      return true;
+    }
+    return false;
+  }
+
+  bool isInitialized() const {
+    return !isFinalized_ && isInitialized_;
+  }
+
+  void finalize(bool fromDestructor = false) {
+    if (isFinalized_.exchange(true)) {
+      // Already finalized.
+      return;
+    }
+    if (isInitialized_.exchange(false)) {
+      // Was initialized.
+      if (fromDestructor) {
+        VLOG(0)
+            << "finalizeS3FileSystem() was not called even though S3 was initialized."
+               "This could lead to a segmentation fault at exit";
+      }
+      Aws::ShutdownAPI(awsOptions_);
+    }
+  }
+
+  std::string getLogLevelName() const {
+    return Aws::Utils::Logging::GetLogLevelName(
+        awsOptions_.loggingOptions.logLevel);
+  }
+
+  std::string getLogPrefix() const {
+    return logPrefix_;
+  }
+
+ private:
+  void doInitialize(
+      std::string_view logLevel,
+      std::optional<std::string_view> logLocation) {
+    awsOptions_.loggingOptions.logLevel = inferS3LogLevel(logLevel);
+    if (logLocation.has_value()) {
+      logPrefix_ = fmt::format(
+          "{}{}{}",
+          logLocation.value(),
+          logLocation.value().back() == '/' ? "" : "/",
+          Aws::DEFAULT_LOG_PREFIX);
+      awsOptions_.loggingOptions.defaultLogPrefix = logPrefix_.c_str();
+      VLOG(0) << "Custom S3 log location prefix: " << logPrefix_;
+    }
+    // In some situations, curl triggers a SIGPIPE signal causing the entire
+    // process to be terminated without any notification.
+    // This behavior is seen via Prestissimo on AmazonLinux2 on AWS EC2.
+    // Relevant documentation in AWS SDK C++
+    // https://github.com/aws/aws-sdk-cpp/blob/276ee83080fcc521d41d456dbbe61d49392ddf77/src/aws-cpp-sdk-core/include/aws/core/Aws.h#L96
+    // This option allows the AWS SDK C++ to catch the SIGPIPE signal and
+    // log a message.
+    awsOptions_.httpOptions.installSigPipeHandler = true;
+    Aws::InitAPI(awsOptions_);
+  }
+
+  Aws::SDKOptions awsOptions_;
+  std::atomic<bool> isInitialized_;
+  std::atomic<bool> isFinalized_;
+  std::string logPrefix_;
+};
+
+// Singleton to initialize AWS S3.
+AwsInstance* getAwsInstance() {
+  static auto instance = std::make_unique<AwsInstance>();
+  return instance.get();
+}
+
+bool initializeS3(
+    std::string_view logLevel,
+    std::optional<std::string_view> logLocation) {
+  return getAwsInstance()->initialize(logLevel, logLocation);
+}
+
+static std::atomic<int> fileSystemCount = 0;
+
+void finalizeS3() {
+  VELOX_CHECK((fileSystemCount == 0), "Cannot finalize S3 while in use");
+  getAwsInstance()->finalize();
+}
+
+class S3FileSystem::Impl {
+ public:
+  Impl(const S3Config& s3Config) {
+    VELOX_CHECK(getAwsInstance()->isInitialized(), "S3 is not initialized");
+    Aws::S3::S3ClientConfiguration clientConfig;
+    if (s3Config.endpoint().has_value()) {
+      clientConfig.endpointOverride = s3Config.endpoint().value();
+    }
+
+    if (s3Config.endpointRegion().has_value()) {
+      clientConfig.region = s3Config.endpointRegion().value();
+    }
+
+    if (s3Config.useProxyFromEnv()) {
+      auto proxyConfig =
+          S3ProxyConfigurationBuilder(
+              s3Config.endpoint().has_value() ? s3Config.endpoint().value()
+                                              : "")
+              .useSsl(s3Config.useSSL())
+              .build();
+      if (proxyConfig.has_value()) {
+        clientConfig.proxyScheme = Aws::Http::SchemeMapper::FromString(
+            proxyConfig.value().scheme().c_str());
+        clientConfig.proxyHost = awsString(proxyConfig.value().host());
+        clientConfig.proxyPort = proxyConfig.value().port();
+        clientConfig.proxyUserName = awsString(proxyConfig.value().username());
+        clientConfig.proxyPassword = awsString(proxyConfig.value().password());
+      }
+    }
+
+    if (s3Config.useSSL()) {
+      clientConfig.scheme = Aws::Http::Scheme::HTTPS;
+    } else {
+      clientConfig.scheme = Aws::Http::Scheme::HTTP;
+    }
+
+    if (s3Config.connectTimeout().has_value()) {
+      clientConfig.connectTimeoutMs =
+          std::chrono::duration_cast<std::chrono::milliseconds>(
+              facebook::velox::config::toDuration(
+                  s3Config.connectTimeout().value()))
+              .count();
+    }
+
+    if (s3Config.socketTimeout().has_value()) {
+      clientConfig.requestTimeoutMs =
+          std::chrono::duration_cast<std::chrono::milliseconds>(
+              facebook::velox::config::toDuration(
+                  s3Config.socketTimeout().value()))
+              .count();
+    }
+
+    if (s3Config.maxConnections().has_value()) {
+      clientConfig.maxConnections = s3Config.maxConnections().value();
+    }
+
+    auto retryStrategy = getRetryStrategy(s3Config);
+    if (retryStrategy.has_value()) {
+      clientConfig.retryStrategy = retryStrategy.value();
+    }
+
+    clientConfig.useVirtualAddressing = s3Config.useVirtualAddressing();
+    clientConfig.payloadSigningPolicy =
+        inferPayloadSign(s3Config.payloadSigningPolicy());
+
+    auto credentialsProvider = getCredentialsProvider(s3Config);
+
+    client_ = std::make_shared<Aws::S3::S3Client>(
+        credentialsProvider, nullptr /* endpointProvider */, clientConfig);
+    ++fileSystemCount;
+  }
+
+  ~Impl() {
+    client_.reset();
+    --fileSystemCount;
+  }
+
+  // Configure and return an AWSCredentialsProvider with access key and secret
+  // key.
+  std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
+  getAccessKeySecretKeyCredentialsProvider(
+      const std::string& accessKey,
+      const std::string& secretKey) const {
+    return std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
+        awsString(accessKey), awsString(secretKey));
+  }
+
+  // Return a default AWSCredentialsProvider.
+  std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
+  getDefaultCredentialsProvider() const {
+    return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
+  }
+
+  // Configure and return an AWSCredentialsProvider with S3 IAM Role.
+  std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
+  getIAMRoleCredentialsProvider(
+      const std::string& s3IAMRole,
+      const std::string& sessionName) const {
+    return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
+        awsString(s3IAMRole), awsString(sessionName));
+  }
+
+  // Return an AWSCredentialsProvider based on the config.
+  std::shared_ptr<Aws::Auth::AWSCredentialsProvider> getCredentialsProvider(
+      const S3Config& s3Config) const {
+    auto accessKey = s3Config.accessKey();
+    auto secretKey = s3Config.secretKey();
+    const auto iamRole = s3Config.iamRole();
+
+    int keyCount = accessKey.has_value() + secretKey.has_value();
+    // keyCount=0 means both are not specified
+    // keyCount=2 means both are specified
+    // keyCount=1 means only one of them is specified and is an error
+    VELOX_USER_CHECK(
+        (keyCount != 1),
+        "Invalid configuration: both access key and secret key must be specified");
+
+    int configCount = (accessKey.has_value() && secretKey.has_value()) +
+        iamRole.has_value() + s3Config.useInstanceCredentials();
+    VELOX_USER_CHECK(
+        (configCount <= 1),
+        "Invalid configuration: specify only one among 'access/secret keys', 'use instance credentials', 'IAM role'");
+
+    if (accessKey.has_value() && secretKey.has_value()) {
+      return getAccessKeySecretKeyCredentialsProvider(
+          accessKey.value(), secretKey.value());
+    }
+
+    if (s3Config.useInstanceCredentials()) {
+      return getDefaultCredentialsProvider();
+    }
+
+    if (iamRole.has_value()) {
+      return getIAMRoleCredentialsProvider(
+          iamRole.value(), s3Config.iamRoleSessionName());
+    }
+
+    return getDefaultCredentialsProvider();
+  }
+
+  // Return a client RetryStrategy based on the config.
+  std::optional<std::shared_ptr<Aws::Client::RetryStrategy>> getRetryStrategy(
+      const S3Config& s3Config) const {
+    auto retryMode = s3Config.retryMode();
+    auto maxAttempts = s3Config.maxAttempts();
+    if (retryMode.has_value()) {
+      if (retryMode.value() == "standard") {
+        if (maxAttempts.has_value()) {
+          VELOX_USER_CHECK_GE(
+              maxAttempts.value(),
+              0,
+              "Invalid configuration: specified 'hive.s3.max-attempts' value {} is < 0.",
+              maxAttempts.value());
+          return std::make_shared<Aws::Client::StandardRetryStrategy>(
+              maxAttempts.value());
+        } else {
+          // Otherwise, use default value 3.
+          return std::make_shared<Aws::Client::StandardRetryStrategy>();
+        }
+      } else if (retryMode.value() == "adaptive") {
+        if (maxAttempts.has_value()) {
+          VELOX_USER_CHECK_GE(
+              maxAttempts.value(),
+              0,
+              "Invalid configuration: specified 'hive.s3.max-attempts' value {} is < 0.",
+              maxAttempts.value());
+          return std::make_shared<Aws::Client::AdaptiveRetryStrategy>(
+              maxAttempts.value());
+        } else {
+          // Otherwise, use default value 3.
+          return std::make_shared<Aws::Client::AdaptiveRetryStrategy>();
+        }
+      } else if (retryMode.value() == "legacy") {
+        if (maxAttempts.has_value()) {
+          VELOX_USER_CHECK_GE(
+              maxAttempts.value(),
+              0,
+              "Invalid configuration: specified 'hive.s3.max-attempts' value {} is < 0.",
+              maxAttempts.value());
+          return std::make_shared<Aws::Client::DefaultRetryStrategy>(
+              maxAttempts.value());
+        } else {
+          // Otherwise, use default value maxRetries = 10, scaleFactor = 25
+          return std::make_shared<Aws::Client::DefaultRetryStrategy>();
+        }
+      } else {
+        VELOX_USER_FAIL("Invalid retry mode for S3: {}", retryMode.value());
+      }
+    }
+    return std::nullopt;
+  }
+
+  // Make it clear that the S3FileSystem instance owns the S3Client.
+  // Once the S3FileSystem is destroyed, the S3Client fails to work
+  // due to the Aws::ShutdownAPI invocation in the destructor.
+  Aws::S3::S3Client* s3Client() const {
+    return client_.get();
+  }
+
+  std::string getLogLevelName() const {
+    return getAwsInstance()->getLogLevelName();
+  }
+
+  std::string getLogPrefix() const {
+    return getAwsInstance()->getLogPrefix();
+  }
+
+ private:
+  std::shared_ptr<Aws::S3::S3Client> client_;
+};
+
+S3FileSystem::S3FileSystem(
+    std::string_view bucketName,
+    const std::shared_ptr<const config::ConfigBase> config)
+    : FileSystem(config) {
+  S3Config s3Config(bucketName, config);
+  impl_ = std::make_shared<Impl>(s3Config);
+}
+
+std::string S3FileSystem::getLogLevelName() const {
+  return impl_->getLogLevelName();
+}
+
+std::string S3FileSystem::getLogPrefix() const {
+  return impl_->getLogPrefix();
+}
+
+std::unique_ptr<ReadFile> S3FileSystem::openFileForRead(
+    std::string_view s3Path,
+    const FileOptions& options) {
+  const auto path = getPath(s3Path);
+  auto s3file = std::make_unique<S3ReadFile>(path, impl_->s3Client());
+  s3file->initialize(options);
+  return s3file;
+}
+
+std::unique_ptr<WriteFile> S3FileSystem::openFileForWrite(
+    std::string_view s3Path,
+    const FileOptions& options) {
+  const auto path = getPath(s3Path);
+  auto s3file =
+      std::make_unique<S3WriteFile>(path, impl_->s3Client(), options.pool);
+  return s3file;
+}
+
+std::string S3FileSystem::name() const {
+  return "S3";
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h b/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h
new file mode 100644
index 000000000000..1330282da2fc
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/common/file/FileSystems.h"
+
+namespace facebook::velox::filesystems {
+
+bool initializeS3(
+    std::string_view logLevel = "FATAL",
+    std::optional<std::string_view> logLocation = std::nullopt);
+
+void finalizeS3();
+
+/// Implementation of S3 filesystem and file interface.
+/// We provide a registration method for read and write files so the appropriate
+/// type of file can be constructed based on a filename.
+class S3FileSystem : public FileSystem {
+ public:
+  S3FileSystem(
+      std::string_view bucketName,
+      const std::shared_ptr<const config::ConfigBase> config);
+
+  std::string name() const override;
+
+  std::unique_ptr<ReadFile> openFileForRead(
+      std::string_view s3Path,
+      const FileOptions& options = {}) override;
+
+  std::unique_ptr<WriteFile> openFileForWrite(
+      std::string_view s3Path,
+      const FileOptions& options) override;
+
+  void remove(std::string_view path) override {
+    VELOX_UNSUPPORTED("remove for S3 not implemented");
+  }
+
+  void rename(
+      std::string_view path,
+      std::string_view newPath,
+      bool overWrite = false) override {
+    VELOX_UNSUPPORTED("rename for S3 not implemented");
+  }
+
+  bool exists(std::string_view path) override {
+    VELOX_UNSUPPORTED("exists for S3 not implemented");
+  }
+
+  std::vector<std::string> list(std::string_view path) override {
+    VELOX_UNSUPPORTED("list for S3 not implemented");
+  }
+
+  void mkdir(std::string_view path, const DirectoryOptions& options = {})
+      override {
+    VELOX_UNSUPPORTED("mkdir for S3 not implemented");
+  }
+
+  void rmdir(std::string_view path) override {
+    VELOX_UNSUPPORTED("rmdir for S3 not implemented");
+  }
+
+  std::string getLogLevelName() const;
+
+  std::string getLogPrefix() const;
+
+ protected:
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.cpp
new file mode 100644
index 000000000000..e30eaf851223
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Implementation of S3 filesystem and file interface.
+// We provide a registration method for read and write files so the appropriate
+// type of file can be constructed based on a filename. See the
+// (register|generate)ReadFile and (register|generate)WriteFile functions.
+
+#include "folly/IPAddress.h"
+#include "re2/re2.h"
+
+#include "S3Util.h"
+
+namespace facebook::velox::filesystems {
+
+std::string getErrorStringFromS3Error(
+    const Aws::Client::AWSError<Aws::S3::S3Errors>& error) {
+  switch (error.GetErrorType()) {
+    case Aws::S3::S3Errors::NO_SUCH_BUCKET:
+      return "No such bucket";
+    case Aws::S3::S3Errors::NO_SUCH_KEY:
+      return "No such key";
+    case Aws::S3::S3Errors::RESOURCE_NOT_FOUND:
+      return "Resource not found";
+    case Aws::S3::S3Errors::ACCESS_DENIED:
+      return "Access denied";
+    case Aws::S3::S3Errors::SERVICE_UNAVAILABLE:
+      return "Service unavailable";
+    case Aws::S3::S3Errors::NETWORK_CONNECTION:
+      return "Network connection";
+    default:
+      return "Unknown error";
+  }
+}
+
+/// The noProxyList is a comma separated list of subdomains, domains or IP
+/// ranges (using CIDR). For a given hostname check if it has a matching
+/// subdomain, domain or IP range in the noProxyList.
+bool isHostExcludedFromProxy(
+    const std::string& hostname,
+    const std::string& noProxyList) {
+  std::vector<std::string> noProxyListElements{};
+
+  if (noProxyList.empty()) {
+    return false;
+  }
+
+  auto hostAsIp = folly::IPAddress::tryFromString(hostname);
+  folly::split(',', noProxyList, noProxyListElements);
+  for (auto elem : noProxyListElements) {
+    // Elem contains "/" which separates IP and subnet mask e.g. 192.168.1.0/24.
+    if (elem.find("/") != std::string::npos && hostAsIp.hasValue()) {
+      return hostAsIp.value().inSubnet(elem);
+    }
+    // Match subdomain, domain names and IP address strings.
+    else if (
+        elem.length() < hostname.length() && elem[0] == '.' &&
+        !hostname.compare(
+            hostname.length() - elem.length(), elem.length(), elem)) {
+      return true;
+    } else if (
+        elem.length() < hostname.length() && elem[0] == '*' && elem[1] == '.' &&
+        !hostname.compare(
+            hostname.length() - elem.length() + 1,
+            elem.length() - 1,
+            elem.substr(1))) {
+      return true;
+    } else if (elem.length() == hostname.length() && !hostname.compare(elem)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Reading the various proxy related environment variables.
+/// There is a lacking standard. The environment variables can be
+/// defined lower case or upper case. The lower case values are checked
+/// first and, if set, returned, therefore taking precendence.
+/// Note, the envVar input is expected to be lower case.
+namespace {
+std::string readProxyEnvVar(std::string envVar) {
+  auto httpProxy = getenv(envVar.c_str());
+  if (httpProxy) {
+    return std::string(httpProxy);
+  }
+
+  std::transform(envVar.begin(), envVar.end(), envVar.begin(), ::toupper);
+  httpProxy = getenv(envVar.c_str());
+  if (httpProxy) {
+    return std::string(httpProxy);
+  }
+  return "";
+};
+} // namespace
+
+std::string getHttpProxyEnvVar() {
+  return readProxyEnvVar("http_proxy");
+}
+
+std::string getHttpsProxyEnvVar() {
+  return readProxyEnvVar("https_proxy");
+};
+
+std::string getNoProxyEnvVar() {
+  return readProxyEnvVar("no_proxy");
+};
+
+std::optional<folly::Uri> S3ProxyConfigurationBuilder::build() {
+  std::string proxyUrl;
+  if (useSsl_) {
+    proxyUrl = getHttpsProxyEnvVar();
+  } else {
+    proxyUrl = getHttpProxyEnvVar();
+  }
+
+  if (proxyUrl.empty()) {
+    return std::nullopt;
+  }
+  folly::Uri proxyUri(proxyUrl);
+
+  /// The endpoint is usually a domain with port or an
+  /// IP address with port. It is assumed that there are
+  /// 2 parts separated by a colon.
+  std::vector<std::string> endpointElements{};
+  folly::split(':', s3Endpoint_, endpointElements);
+  if (FOLLY_UNLIKELY(endpointElements.size() > 2)) {
+    LOG(ERROR) << fmt::format(
+        "Too many parts in S3 endpoint URI {} ", s3Endpoint_);
+    return std::nullopt;
+  }
+
+  auto noProxy = getNoProxyEnvVar();
+  if (isHostExcludedFromProxy(endpointElements[0], noProxy)) {
+    return std::nullopt;
+  }
+  return proxyUri;
+}
+
+std::optional<std::string> parseAWSStandardRegionName(
+    std::string_view endpoint) {
+  // The assumption is that the endpoint ends with
+  // ".amazonaws.com" or ".amazonaws.com/". That means for AWS we don't
+  // expect a port in the endpoint.
+  const std::string_view kAmazonHostSuffix = ".amazonaws.com";
+  auto index = endpoint.size() - kAmazonHostSuffix.size();
+  // Handle the case where the endpoint ends in a trailing slash.
+  if (endpoint.back() == '/') {
+    index--;
+  }
+  if (endpoint.rfind(kAmazonHostSuffix) != index) {
+    return std::nullopt;
+  }
+  // Remove the kAmazonHostSuffix.
+  std::string_view endpointPrefix = endpoint.substr(0, index);
+  const re2::RE2 pattern("^(?:.+\\.)?s3[-.]([a-z0-9-]+)$");
+  std::string region;
+  if (re2::RE2::FullMatch(endpointPrefix, pattern, &region)) {
+    // endpointPrefix is 'bucket.s3-[region]' or 'bucket.s3.[region]'
+    return region;
+  }
+
+  index = endpointPrefix.rfind('.');
+  if (index != std::string::npos) {
+    // endpointPrefix was 'service.[region]'.
+    return std::string(endpointPrefix.substr(index + 1));
+  }
+
+  // Use default region set by the SDK.
+  return std::nullopt;
+}
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h
new file mode 100644
index 000000000000..966c6bfe30a7
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Implementation of S3 filesystem and file interface.
+// We provide a registration method for read and write files so the appropriate
+// type of file can be constructed based on a filename. See the
+// (register|generate)ReadFile and (register|generate)WriteFile functions.
+
+#pragma once
+
+#include <aws/s3/S3Errors.h>
+#include <aws/s3/model/HeadObjectResult.h>
+#include <fmt/format.h>
+#include <folly/Uri.h>
+
+#include "velox/common/base/Exceptions.h"
+
+namespace facebook::velox::filesystems {
+
+namespace {
+static std::string_view kSep{"/"};
+// AWS S3 EMRFS, Hadoop block storage filesystem on-top of Amazon S3 buckets.
+static std::string_view kS3Scheme{"s3://"};
+// This should not be mixed with s3 nor the s3a.
+// S3A Hadoop 3.x (previous connectors "s3" and "s3n" are deprecated).
+static std::string_view kS3aScheme{"s3a://"};
+// DEPRECATED: s3n are deprecated in Hadoop 3.x but we are supporting s3n for
+// data that hasn't been migrated yet.
+static std::string_view kS3nScheme{"s3n://"};
+// OSS Alibaba support S3 format, usage only with SSL.
+static std::string_view kOssScheme{"oss://"};
+// Tencent COS support S3 format.
+static std::string_view kCosScheme{"cos://"};
+static std::string_view kCosNScheme{"cosn://"};
+
+// From AWS documentation
+constexpr int kS3MaxKeySize{1024};
+} // namespace
+
+inline bool isS3AwsFile(const std::string_view filename) {
+  return filename.substr(0, kS3Scheme.size()) == kS3Scheme;
+}
+
+inline bool isS3aFile(const std::string_view filename) {
+  return filename.substr(0, kS3aScheme.size()) == kS3aScheme;
+}
+
+inline bool isS3nFile(const std::string_view filename) {
+  return filename.substr(0, kS3nScheme.size()) == kS3nScheme;
+}
+
+inline bool isOssFile(const std::string_view filename) {
+  return filename.substr(0, kOssScheme.size()) == kOssScheme;
+}
+
+inline bool isCosFile(const std::string_view filename) {
+  return filename.substr(0, kCosScheme.size()) == kCosScheme;
+}
+
+inline bool isCosNFile(const std::string_view filename) {
+  return filename.substr(0, kCosNScheme.size()) == kCosNScheme;
+}
+
+inline bool isS3File(const std::string_view filename) {
+  // TODO: Each prefix should be implemented as its own filesystem.
+  return isS3AwsFile(filename) || isS3aFile(filename) || isS3nFile(filename) ||
+      isOssFile(filename) || isCosFile(filename) || isCosNFile(filename);
+}
+
+// The input `path` must not have the S3 prefix.
+inline void getBucketAndKeyFromPath(
+    std::string_view path,
+    std::string& bucket,
+    std::string& key) {
+  auto firstSep = path.find_first_of(kSep);
+  bucket = path.substr(0, firstSep);
+  key = path.substr(firstSep + 1);
+}
+
+// TODO: Correctness check for bucket name.
+// 1. Length between 3 and 63:
+//    3 < length(bucket) < 63
+// 2. Mandatory label notation - regexp:
+//    regexp="(^[a-z0-9])([.-]?[a-z0-9]+){2,62}([/]?$)"
+// 3. Disallowed IPv4 notation - regexp:
+//    regexp="^((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}[/]?$"
+inline std::string s3URI(std::string_view bucket, std::string_view key) {
+  std::stringstream ss;
+  ss << kS3Scheme << bucket << kSep << key;
+  return ss.str();
+}
+
+inline std::string_view getPath(std::string_view path) {
+  // Remove one of the prefixes 's3://', 'oss://', 's3a://' if any from the
+  // given path.
+  // TODO: Each prefix should be implemented as its own filesystem.
+  if (isS3AwsFile(path)) {
+    return path.substr(kS3Scheme.length());
+  } else if (isS3aFile(path)) {
+    return path.substr(kS3aScheme.length());
+  } else if (isS3nFile(path)) {
+    return path.substr(kS3nScheme.length());
+  } else if (isOssFile(path)) {
+    return path.substr(kOssScheme.length());
+  } else if (isCosFile(path)) {
+    return path.substr(kCosScheme.length());
+  } else if (isCosNFile(path)) {
+    return path.substr(kCosNScheme.length());
+  }
+  return path;
+}
+
+inline Aws::String awsString(const std::string& s) {
+  return Aws::String(s.begin(), s.end());
+}
+
+std::string getErrorStringFromS3Error(
+    const Aws::Client::AWSError<Aws::S3::S3Errors>& error);
+
+namespace {
+inline std::string getS3BackendService(
+    const Aws::Http::HeaderValueCollection& headers) {
+  const auto it = headers.find("server");
+  if (it != headers.end()) {
+    return it->second;
+  }
+  return "Unknown";
+}
+
+inline std::string getRequestID(
+    const Aws::Http::HeaderValueCollection& headers) {
+  static const std::vector<std::string> kRequestIds{
+      "x-amz-request-id", "x-oss-request-id"};
+
+  for (const auto& kRequestId : kRequestIds) {
+    const auto it = headers.find(kRequestId);
+    if (it != headers.end()) {
+      return it->second;
+    }
+  }
+  return "";
+}
+} // namespace
+
+/// Only Amazon (amz) and Alibaba (oss) request IDs are supported.
+#define VELOX_CHECK_AWS_OUTCOME(outcome, errorMsgPrefix, bucket, key)                                                           \
+  {                                                                                                                             \
+    if (!outcome.IsSuccess()) {                                                                                                 \
+      auto error = outcome.GetError();                                                                                          \
+      auto errMsg = fmt::format(                                                                                                \
+          "{} due to: '{}'. Path:'{}', SDK Error Type:{}, HTTP Status Code:{}, S3 Service:'{}', Message:'{}', RequestID:'{}'.", \
+          errorMsgPrefix,                                                                                                       \
+          getErrorStringFromS3Error(error),                                                                                     \
+          s3URI(bucket, key),                                                                                                   \
+          static_cast<int>(error.GetErrorType()),                                                                               \
+          error.GetResponseCode(),                                                                                              \
+          getS3BackendService(error.GetResponseHeaders()),                                                                      \
+          error.GetMessage(),                                                                                                   \
+          getRequestID(error.GetResponseHeaders()));                                                                            \
+      if (IsRetryableHttpResponseCode(error.GetResponseCode())) {                                                               \
+        auto retryHint = fmt::format(                                                                                           \
+            " Request failed after retrying {} times. Try increasing the value of 'hive.s3.max-attempts'.",                     \
+            outcome.GetRetryCount());                                                                                           \
+        errMsg.append(retryHint);                                                                                               \
+      }                                                                                                                         \
+      if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) {                                                  \
+        VELOX_FILE_NOT_FOUND_ERROR(errMsg);                                                                                     \
+      }                                                                                                                         \
+      VELOX_FAIL(errMsg);                                                                                                       \
+    }                                                                                                                           \
+  }
+
+bool isHostExcludedFromProxy(
+    const std::string& hostname,
+    const std::string& noProxyList);
+
+std::string getHttpProxyEnvVar();
+std::string getHttpsProxyEnvVar();
+std::string getNoProxyEnvVar();
+
+// Adopted from the AWS Java SDK
+// Endpoint can be 'service.[region].amazonaws.com' or
+// 'bucket.s3-[region].amazonaws.com' or bucket.s3.[region].amazonaws.com'
+// Return value is a region string value if present.
+// The endpoint may contain a trailing '/' that is handled.
+std::optional<std::string> parseAWSStandardRegionName(
+    std::string_view endpoint);
+
+class S3ProxyConfigurationBuilder {
+ public:
+  S3ProxyConfigurationBuilder(const std::string& s3Endpoint)
+      : s3Endpoint_(s3Endpoint){};
+
+  S3ProxyConfigurationBuilder& useSsl(const bool& useSsl) {
+    useSsl_ = useSsl;
+    return *this;
+  }
+
+  std::optional<folly::Uri> build();
+
+ private:
+  const std::string s3Endpoint_;
+  bool useSsl_;
+};
+
+} // namespace facebook::velox::filesystems
+
+template <>
+struct fmt::formatter<Aws::Http::HttpResponseCode> : formatter<int> {
+  auto format(Aws::Http::HttpResponseCode s, format_context& ctx) {
+    return formatter<int>::format(static_cast<int>(s), ctx);
+  }
+};
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/S3WriteFile.h b/velox/connectors/lakehouse/storage_adapters/s3fs/S3WriteFile.h
new file mode 100644
index 000000000000..929eed20c370
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/S3WriteFile.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/common/file/File.h"
+#include "velox/common/memory/MemoryPool.h"
+
+namespace Aws::S3 {
+class S3Client;
+}
+
+namespace facebook::velox::filesystems {
+
+/// S3WriteFile uses the Apache Arrow implementation as a reference.
+/// AWS C++ SDK allows streaming writes via the MultiPart upload API.
+/// Multipart upload allows you to upload a single object as a set of parts.
+/// Each part is a contiguous portion of the object's data.
+/// While AWS and Minio support different sizes for each
+/// part (only requiring a minimum of 5MB), Cloudflare R2 requires that every
+/// part be exactly equal (except for the last part). We set this to 10 MiB, so
+/// that in combination with the maximum number of parts of 10,000, this gives a
+/// file limit of 100k MiB (or about 98 GiB).
+/// (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
+/// (for rational, see: https://github.com/apache/arrow/issues/34363)
+/// You can upload these object parts independently and in any order.
+/// After all parts of your object are uploaded, Amazon S3 assembles these parts
+/// and creates the object.
+/// https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html
+/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/filesystem/s3fs.cc
+/// S3WriteFile is not thread-safe.
+/// UploadPart is currently synchronous during append and flush.
+/// TODO: Evaluate and add option for asynchronous part uploads.
+/// TODO: Implement retry on failure.
+class S3WriteFile : public WriteFile {
+ public:
+  S3WriteFile(
+      std::string_view path,
+      Aws::S3::S3Client* client,
+      memory::MemoryPool* pool);
+
+  /// Appends data to the end of the file.
+  /// Uploads a part on reaching part size limit.
+  void append(std::string_view data) override;
+
+  /// No-op. Append handles the flush.
+  void flush() override;
+
+  /// Close the file. Any cleanup (disk flush, etc.) will be done here.
+  void close() override;
+
+  /// Current file size, i.e. the sum of all previous Appends.
+  uint64_t size() const override;
+
+  /// Return the number of parts uploaded so far.
+  int numPartsUploaded() const;
+
+ protected:
+  class Impl;
+  std::shared_ptr<Impl> impl_;
+};
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/CMakeLists.txt b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/CMakeLists.txt
new file mode 100644
index 000000000000..214d1e2c2b8e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/CMakeLists.txt
@@ -0,0 +1,115 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(velox_s3config_test S3ConfigTest.cpp)
+add_test(velox_s3config_test velox_s3config_test)
+target_link_libraries(
+  velox_s3config_test
+  velox_common_config
+  velox_lakehouse_s3fs
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3file_test S3FileSystemTest.cpp S3UtilTest.cpp)
+add_test(velox_s3file_test velox_s3file_test)
+target_link_libraries(
+  velox_s3file_test
+  velox_file
+  velox_lakehouse_s3fs
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3registration_test S3FileSystemRegistrationTest.cpp)
+add_test(velox_s3registration_test velox_s3registration_test)
+target_link_libraries(
+  velox_s3registration_test
+  velox_file
+  velox_lakehouse_s3fs
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_parquet_reader
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3finalize_test S3FileSystemFinalizeTest.cpp)
+add_test(velox_s3finalize_test velox_s3finalize_test)
+target_link_libraries(
+  velox_s3finalize_test
+  velox_lakehouse_s3fs
+  velox_file
+  velox_core
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3insert_test S3InsertTest.cpp)
+add_test(velox_s3insert_test velox_s3insert_test)
+target_link_libraries(
+  velox_s3insert_test
+  velox_file
+  velox_lakehouse_s3fs
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_parquet_writer
+  velox_dwio_parquet_reader
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3read_test S3ReadTest.cpp)
+add_test(
+  NAME velox_s3read_test
+  COMMAND velox_s3read_test
+  WORKING_DIRECTORY .)
+target_link_libraries(
+  velox_s3read_test
+  velox_file
+  velox_lakehouse_s3fs
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_parquet_reader
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3metrics_test S3FileSystemMetricsTest.cpp)
+add_test(velox_s3metrics_test velox_s3metrics_test)
+target_link_libraries(
+  velox_s3metrics_test
+  velox_lakehouse_s3fs
+  velox_exec_test_lib
+  GTest::gtest
+  GTest::gtest_main)
+
+add_executable(velox_s3multiendpoints_test S3MultipleEndpointsTest.cpp)
+add_test(velox_s3multiendpoints_test velox_s3multiendpoints_test)
+target_link_libraries(
+  velox_s3multiendpoints_test
+  velox_file
+  velox_lakehouse_s3fs
+  velox_core
+  velox_exec_test_lib
+  velox_dwio_parquet_reader
+  velox_dwio_parquet_writer
+  velox_dwio_common_exception
+  velox_exec
+  GTest::gtest
+  GTest::gtest_main)
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/MinioServer.h b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/MinioServer.h
new file mode 100644
index 000000000000..591ed403f350
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/MinioServer.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "velox/common/config/Config.h"
+#include "velox/exec/tests/utils/PortUtil.h"
+#include "velox/exec/tests/utils/TempDirectoryPath.h"
+
+#include "boost/process.hpp"
+
+using namespace facebook::velox;
+
+namespace {
+constexpr char const* kMinioExecutableName{"minio-2022-05-26"};
+constexpr char const* kMinioAccessKey{"minio"};
+constexpr char const* kMinioSecretKey{"miniopass"};
+} // namespace
+
+// A minio server, managed as a child process.
+// Adapted from the Apache Arrow library.
+class MinioServer {
+ public:
+  MinioServer() : tempPath_(::exec::test::TempDirectoryPath::create()) {
+    constexpr auto kHostAddressTemplate = "127.0.0.1:{}";
+    auto ports = facebook::velox::exec::test::getFreePorts(2);
+    connectionString_ = fmt::format(kHostAddressTemplate, ports[0]);
+    consoleAddress_ = fmt::format(kHostAddressTemplate, ports[1]);
+  }
+
+  void start();
+
+  void stop();
+
+  void addBucket(const char* bucket) {
+    const std::string path = tempPath_->getPath() + "/" + bucket;
+    mkdir(path.c_str(), S_IRWXU | S_IRWXG);
+  }
+
+  std::string path() const {
+    return tempPath_->getPath();
+  }
+
+  std::shared_ptr<const config::ConfigBase> hiveConfig(
+      const std::unordered_map<std::string, std::string> configOverride = {})
+      const {
+    std::unordered_map<std::string, std::string> config({
+        {"hive.s3.aws-access-key", accessKey_},
+        {"hive.s3.aws-secret-key", secretKey_},
+        {"hive.s3.endpoint", connectionString_},
+        {"hive.s3.ssl.enabled", "false"},
+        {"hive.s3.path-style-access", "true"},
+    });
+
+    // Update the default config map with the supplied configOverride map
+    for (const auto& [configName, configValue] : configOverride) {
+      config[configName] = configValue;
+    }
+
+    return std::make_shared<const config::ConfigBase>(std::move(config));
+  }
+
+ private:
+  const std::shared_ptr<exec::test::TempDirectoryPath> tempPath_;
+  std::string connectionString_;
+  std::string consoleAddress_;
+  const std::string accessKey_ = kMinioAccessKey;
+  const std::string secretKey_ = kMinioSecretKey;
+  std::shared_ptr<::boost::process::child> serverProcess_;
+};
+
+void MinioServer::start() {
+  boost::process::environment env = boost::this_process::environment();
+  env["MINIO_ACCESS_KEY"] = accessKey_;
+  env["MINIO_SECRET_KEY"] = secretKey_;
+
+  auto exePath = boost::process::search_path(kMinioExecutableName);
+  if (exePath.empty()) {
+    VELOX_FAIL("Failed to find minio executable {}'", kMinioExecutableName);
+  }
+
+  const auto path = tempPath_->getPath();
+  try {
+    serverProcess_ = std::make_shared<boost::process::child>(
+        env,
+        exePath,
+        "server",
+        "--quiet",
+        "--compat",
+        "--address",
+        connectionString_,
+        "--console-address",
+        consoleAddress_,
+        path.c_str());
+  } catch (const std::exception& e) {
+    VELOX_FAIL("Failed to launch Minio server: {}", e.what());
+  }
+}
+
+void MinioServer::stop() {
+  if (serverProcess_ && serverProcess_->valid()) {
+    // Brutal shutdown
+    serverProcess_->terminate();
+    serverProcess_->wait();
+  }
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ConfigTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ConfigTest.cpp
new file mode 100644
index 000000000000..c1faee22b8a3
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ConfigTest.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Config.h"
+#include "velox/common/config/Config.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::filesystems {
+namespace {
+TEST(S3ConfigTest, defaultConfig) {
+  auto config = std::make_shared<config::ConfigBase>(
+      std::unordered_map<std::string, std::string>());
+  auto s3Config = S3Config("", config);
+  ASSERT_EQ(s3Config.useVirtualAddressing(), true);
+  ASSERT_EQ(s3Config.useSSL(), true);
+  ASSERT_EQ(s3Config.useInstanceCredentials(), false);
+  ASSERT_EQ(s3Config.endpoint(), std::nullopt);
+  ASSERT_EQ(s3Config.endpointRegion(), std::nullopt);
+  ASSERT_EQ(s3Config.accessKey(), std::nullopt);
+  ASSERT_EQ(s3Config.secretKey(), std::nullopt);
+  ASSERT_EQ(s3Config.iamRole(), std::nullopt);
+  ASSERT_EQ(s3Config.iamRoleSessionName(), "velox-session");
+  ASSERT_EQ(s3Config.payloadSigningPolicy(), "Never");
+  ASSERT_EQ(s3Config.cacheKey("foo", config), "foo");
+}
+
+TEST(S3ConfigTest, overrideConfig) {
+  std::unordered_map<std::string, std::string> configFromFile = {
+      {S3Config::baseConfigKey(S3Config::Keys::kPathStyleAccess), "true"},
+      {S3Config::baseConfigKey(S3Config::Keys::kSSLEnabled), "false"},
+      {S3Config::baseConfigKey(S3Config::Keys::kUseInstanceCredentials),
+       "true"},
+      {"hive.s3.payload-signing-policy", "RequestDependent"},
+      {S3Config::baseConfigKey(S3Config::Keys::kEndpoint), "endpoint"},
+      {S3Config::baseConfigKey(S3Config::Keys::kEndpointRegion), "region"},
+      {S3Config::baseConfigKey(S3Config::Keys::kAccessKey), "access"},
+      {S3Config::baseConfigKey(S3Config::Keys::kSecretKey), "secret"},
+      {S3Config::baseConfigKey(S3Config::Keys::kIamRole), "iam"},
+      {S3Config::baseConfigKey(S3Config::Keys::kIamRoleSessionName), "velox"}};
+  auto configBase =
+      std::make_shared<config::ConfigBase>(std::move(configFromFile));
+  auto s3Config = S3Config("", configBase);
+  ASSERT_EQ(s3Config.useVirtualAddressing(), false);
+  ASSERT_EQ(s3Config.useSSL(), false);
+  ASSERT_EQ(s3Config.useInstanceCredentials(), true);
+  ASSERT_EQ(s3Config.endpoint(), "endpoint");
+  ASSERT_EQ(s3Config.endpointRegion(), "region");
+  ASSERT_EQ(s3Config.accessKey(), std::optional("access"));
+  ASSERT_EQ(s3Config.secretKey(), std::optional("secret"));
+  ASSERT_EQ(s3Config.iamRole(), std::optional("iam"));
+  ASSERT_EQ(s3Config.iamRoleSessionName(), "velox");
+  ASSERT_EQ(s3Config.payloadSigningPolicy(), "RequestDependent");
+  ASSERT_EQ(s3Config.cacheKey("foo", configBase), "endpoint-foo");
+  ASSERT_EQ(s3Config.cacheKey("bar", configBase), "endpoint-bar");
+}
+
+TEST(S3ConfigTest, overrideBucketConfig) {
+  std::string_view bucket = "bucket";
+  std::unordered_map<std::string, std::string> bucketConfigFromFile = {
+      {S3Config::baseConfigKey(S3Config::Keys::kPathStyleAccess), "true"},
+      {S3Config::baseConfigKey(S3Config::Keys::kSSLEnabled), "false"},
+      {S3Config::baseConfigKey(S3Config::Keys::kUseInstanceCredentials),
+       "true"},
+      {S3Config::baseConfigKey(S3Config::Keys::kEndpoint), "endpoint"},
+      {S3Config::bucketConfigKey(S3Config::Keys::kEndpoint, bucket),
+       "bucket.s3-region.amazonaws.com"},
+      {S3Config::baseConfigKey(S3Config::Keys::kAccessKey), "access"},
+      {S3Config::bucketConfigKey(S3Config::Keys::kAccessKey, bucket),
+       "bucket-access"},
+      {"hive.s3.payload-signing-policy", "Always"},
+      {S3Config::baseConfigKey(S3Config::Keys::kSecretKey), "secret"},
+      {S3Config::bucketConfigKey(S3Config::Keys::kSecretKey, bucket),
+       "bucket-secret"},
+      {S3Config::baseConfigKey(S3Config::Keys::kIamRole), "iam"},
+      {S3Config::baseConfigKey(S3Config::Keys::kIamRoleSessionName), "velox"}};
+  auto configBase =
+      std::make_shared<config::ConfigBase>(std::move(bucketConfigFromFile));
+  auto s3Config = S3Config(bucket, configBase);
+  ASSERT_EQ(s3Config.useVirtualAddressing(), false);
+  ASSERT_EQ(s3Config.useSSL(), false);
+  ASSERT_EQ(s3Config.useInstanceCredentials(), true);
+  ASSERT_EQ(s3Config.endpoint(), "bucket.s3-region.amazonaws.com");
+  // Inferred from the endpoint.
+  ASSERT_EQ(s3Config.endpointRegion(), "region");
+  ASSERT_EQ(s3Config.accessKey(), std::optional("bucket-access"));
+  ASSERT_EQ(s3Config.secretKey(), std::optional("bucket-secret"));
+  ASSERT_EQ(s3Config.iamRole(), std::optional("iam"));
+  ASSERT_EQ(s3Config.iamRoleSessionName(), "velox");
+  ASSERT_EQ(s3Config.payloadSigningPolicy(), "Always");
+  ASSERT_EQ(
+      s3Config.cacheKey(bucket, configBase),
+      "bucket.s3-region.amazonaws.com-bucket");
+  ASSERT_EQ(s3Config.cacheKey("foo", configBase), "endpoint-foo");
+}
+
+} // namespace
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemFinalizeTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemFinalizeTest.cpp
new file mode 100644
index 000000000000..8e6abdd63866
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemFinalizeTest.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/config/Config.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h"
+
+#include "gtest/gtest.h"
+
+namespace facebook::velox {
+namespace {
+
+TEST(S3FileSystemFinalizeTest, finalize) {
+  auto s3Config = std::make_shared<config::ConfigBase>(
+      std::unordered_map<std::string, std::string>());
+  ASSERT_TRUE(filesystems::initializeS3());
+  ASSERT_FALSE(filesystems::initializeS3());
+  {
+    filesystems::S3FileSystem s3fs("", s3Config);
+    VELOX_ASSERT_THROW(
+        filesystems::finalizeS3(), "Cannot finalize S3 while in use");
+  }
+  filesystems::finalizeS3();
+  VELOX_ASSERT_THROW(
+      filesystems::initializeS3(),
+      "Attempt to initialize S3 after it has been finalized.");
+}
+
+} // namespace
+} // namespace facebook::velox
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemMetricsTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemMetricsTest.cpp
new file mode 100644
index 000000000000..e877a73259a3
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemMetricsTest.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/init/Init.h>
+
+#include "S3Test.h"
+#include "velox/common/memory/Memory.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Counters.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3WriteFile.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::filesystems {
+namespace {
+class S3TestReporter : public BaseStatsReporter {
+ public:
+  mutable std::mutex m;
+  mutable std::map<std::string, size_t> counterMap;
+  mutable std::unordered_map<std::string, StatType> statTypeMap;
+  mutable std::unordered_map<std::string, std::vector<int32_t>>
+      histogramPercentilesMap;
+
+  void clear() {
+    std::lock_guard<std::mutex> l(m);
+    counterMap.clear();
+    statTypeMap.clear();
+    histogramPercentilesMap.clear();
+  }
+  void registerMetricExportType(const char* key, StatType statType)
+      const override {
+    statTypeMap[key] = statType;
+  }
+
+  void registerMetricExportType(folly::StringPiece key, StatType statType)
+      const override {
+    statTypeMap[key.str()] = statType;
+  }
+
+  void registerHistogramMetricExportType(
+      const char* key,
+      int64_t /* bucketWidth */,
+      int64_t /* min */,
+      int64_t /* max */,
+      const std::vector<int32_t>& pcts) const override {
+    histogramPercentilesMap[key] = pcts;
+  }
+
+  void registerHistogramMetricExportType(
+      folly::StringPiece key,
+      int64_t /* bucketWidth */,
+      int64_t /* min */,
+      int64_t /* max */,
+      const std::vector<int32_t>& pcts) const override {
+    histogramPercentilesMap[key.str()] = pcts;
+  }
+
+  void addMetricValue(const std::string& key, const size_t value)
+      const override {
+    std::lock_guard<std::mutex> l(m);
+    counterMap[key] += value;
+  }
+
+  void addMetricValue(const char* key, const size_t value) const override {
+    std::lock_guard<std::mutex> l(m);
+    counterMap[key] += value;
+  }
+
+  void addMetricValue(folly::StringPiece key, size_t value) const override {
+    std::lock_guard<std::mutex> l(m);
+    counterMap[key.str()] += value;
+  }
+
+  void addHistogramMetricValue(const std::string& key, size_t value)
+      const override {
+    counterMap[key] = std::max(counterMap[key], value);
+  }
+
+  void addHistogramMetricValue(const char* key, size_t value) const override {
+    counterMap[key] = std::max(counterMap[key], value);
+  }
+
+  void addHistogramMetricValue(folly::StringPiece key, size_t value)
+      const override {
+    counterMap[key.str()] = std::max(counterMap[key.str()], value);
+  }
+
+  std::string fetchMetrics() override {
+    std::stringstream ss;
+    ss << "[";
+    auto sep = "";
+    for (const auto& [key, value] : counterMap) {
+      ss << sep << key << ":" << value;
+      sep = ",";
+    }
+    ss << "]";
+    return ss.str();
+  }
+};
+
+folly::Singleton<BaseStatsReporter> reporter([]() {
+  return new S3TestReporter();
+});
+
+class S3FileSystemMetricsTest : public S3Test {
+ protected:
+  static void SetUpTestSuite() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  void SetUp() override {
+    S3Test::SetUp();
+    filesystems::initializeS3("Info");
+    s3Reporter = std::dynamic_pointer_cast<S3TestReporter>(
+        folly::Singleton<BaseStatsReporter>::try_get());
+    s3Reporter->clear();
+  }
+
+  static void TearDownTestSuite() {
+    filesystems::finalizeS3();
+  }
+  std::shared_ptr<S3TestReporter> s3Reporter;
+};
+
+} // namespace
+
+TEST_F(S3FileSystemMetricsTest, metrics) {
+  registerS3Metrics();
+
+  const auto bucketName = "metrics";
+  const auto file = "test.txt";
+  const auto filename = localPath(bucketName) + "/" + file;
+  const auto s3File = s3URI(bucketName, file);
+  auto hiveConfig = minioServer_->hiveConfig();
+  S3FileSystem s3fs(bucketName, hiveConfig);
+  auto pool = memory::memoryManager()->addLeafPool("S3FileSystemMetricsTest");
+
+  auto writeFile =
+      s3fs.openFileForWrite(s3File, {{}, pool.get(), std::nullopt});
+  EXPECT_EQ(1, s3Reporter->counterMap[std::string{kMetricS3MetadataCalls}]);
+  EXPECT_EQ(1, s3Reporter->counterMap[std::string{kMetricS3GetMetadataErrors}]);
+
+  constexpr std::string_view kDataContent =
+      "Dance me to your beauty with a burning violin"
+      "Dance me through the panic till I'm gathered safely in"
+      "Lift me like an olive branch and be my homeward dove"
+      "Dance me to the end of love";
+  writeFile->append(kDataContent);
+  writeFile->close();
+  EXPECT_EQ(1, s3Reporter->counterMap[std::string{kMetricS3StartedUploads}]);
+  EXPECT_EQ(1, s3Reporter->counterMap[std::string{kMetricS3SuccessfulUploads}]);
+
+  auto readFile = s3fs.openFileForRead(s3File);
+  EXPECT_EQ(2, s3Reporter->counterMap[std::string{kMetricS3MetadataCalls}]);
+  readFile->pread(0, kDataContent.length());
+  EXPECT_EQ(1, s3Reporter->counterMap[std::string{kMetricS3GetObjectCalls}]);
+}
+
+} // namespace facebook::velox::filesystems
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  folly::Init init{&argc, &argv, false};
+  BaseStatsReporter::registered = true;
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemRegistrationTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemRegistrationTest.cpp
new file mode 100644
index 000000000000..a6af8ccec12e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemRegistrationTest.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "S3Test.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h"
+
+namespace facebook::velox::filesystems {
+namespace {
+
+std::string cacheKeyFunc(
+    std::shared_ptr<const config::ConfigBase> config,
+    std::string_view path) {
+  return config->get<std::string>("hive.s3.endpoint").value();
+}
+
+class S3FileSystemRegistrationTest : public S3Test {
+ protected:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+    filesystems::registerS3FileSystem(cacheKeyFunc);
+  }
+
+  static void TearDownTestCase() {
+    filesystems::finalizeS3FileSystem();
+  }
+};
+} // namespace
+
+TEST_F(S3FileSystemRegistrationTest, readViaRegistry) {
+  const char* bucketName = "data2";
+  const char* file = "test.txt";
+  const std::string filename = localPath(bucketName) + "/" + file;
+  const std::string s3File = s3URI(bucketName, file);
+  addBucket(bucketName);
+  {
+    LocalWriteFile writeFile(filename);
+    writeData(&writeFile);
+  }
+  auto hiveConfig = minioServer_->hiveConfig();
+  {
+    auto s3fs = filesystems::getFileSystem(s3File, hiveConfig);
+    auto readFile = s3fs->openFileForRead(s3File);
+    readData(readFile.get());
+  }
+}
+
+TEST_F(S3FileSystemRegistrationTest, fileHandle) {
+  const char* bucketName = "data3";
+  const char* file = "test.txt";
+  const std::string filename = localPath(bucketName) + "/" + file;
+  const std::string s3File = s3URI(bucketName, file);
+  addBucket(bucketName);
+  {
+    LocalWriteFile writeFile(filename);
+    writeData(&writeFile);
+  }
+  auto hiveConfig = minioServer_->hiveConfig();
+  FileHandleFactory factory(
+      std::make_unique<SimpleLRUCache<std::string, FileHandle>>(1000),
+      std::make_unique<FileHandleGenerator>(hiveConfig));
+  auto fileHandleCachePtr = factory.generate(s3File);
+  readData(fileHandleCachePtr->file.get());
+}
+
+TEST_F(S3FileSystemRegistrationTest, cacheKey) {
+  auto hiveConfig = minioServer_->hiveConfig();
+  auto s3fs = filesystems::getFileSystem(kDummyPath, hiveConfig);
+  std::string_view kDummyPath2 = "s3://dummy2/foo.txt";
+  auto s3fs_new = filesystems::getFileSystem(kDummyPath2, hiveConfig);
+  // The cacheKeyFunc function allows fs caching based on the endpoint value.
+  ASSERT_EQ(s3fs, s3fs_new);
+}
+
+TEST_F(S3FileSystemRegistrationTest, finalize) {
+  auto hiveConfig = minioServer_->hiveConfig();
+  auto s3fs = filesystems::getFileSystem(kDummyPath, hiveConfig);
+  VELOX_ASSERT_THROW(
+      filesystems::finalizeS3FileSystem(),
+      "Cannot finalize S3FileSystem while in use");
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemTest.cpp
new file mode 100644
index 000000000000..c0600ecaf52e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3FileSystemTest.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "S3Test.h"
+#include "velox/common/memory/Memory.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3WriteFile.h"
+
+#include <gtest/gtest.h>
+
+namespace facebook::velox::filesystems {
+namespace {
+
+class S3FileSystemTest : public S3Test {
+ protected:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  void SetUp() override {
+    S3Test::SetUp();
+    auto hiveConfig = minioServer_->hiveConfig({});
+    filesystems::initializeS3("Info", kLogLocation_);
+  }
+
+  static void TearDownTestSuite() {
+    filesystems::finalizeS3();
+  }
+
+  std::string_view kLogLocation_ = "/tmp/foobar/";
+};
+} // namespace
+
+TEST_F(S3FileSystemTest, writeAndRead) {
+  /// The hive config used for Minio defaults to turning
+  /// off using proxy settings if the environment provides them.
+  setenv("HTTP_PROXY", "http://test:test@127.0.0.1:8888", 1);
+  const char* bucketName = "data";
+  const char* file = "test.txt";
+  const auto filename = localPath(bucketName) + "/" + file;
+  const auto s3File = s3URI(bucketName, file);
+  addBucket(bucketName);
+  {
+    LocalWriteFile writeFile(filename);
+    writeData(&writeFile);
+  }
+  auto hiveConfig = minioServer_->hiveConfig();
+  filesystems::S3FileSystem s3fs(bucketName, hiveConfig);
+  auto readFile = s3fs.openFileForRead(s3File);
+  readData(readFile.get());
+}
+
+TEST_F(S3FileSystemTest, invalidCredentialsConfig) {
+  {
+    std::unordered_map<std::string, std::string> config(
+        {{"hive.s3.use-instance-credentials", "true"},
+         {"hive.s3.iam-role", "dummy-iam-role"}});
+    auto hiveConfig =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+
+    // Both instance credentials and iam-role cannot be specified
+    VELOX_ASSERT_THROW(
+        filesystems::S3FileSystem("", hiveConfig),
+        "Invalid configuration: specify only one among 'access/secret keys', 'use instance credentials', 'IAM role'");
+  }
+  {
+    std::unordered_map<std::string, std::string> config(
+        {{"hive.s3.aws-secret-key", "dummy-key"},
+         {"hive.s3.aws-access-key", "dummy-key"},
+         {"hive.s3.iam-role", "dummy-iam-role"}});
+    auto hiveConfig =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+    // Both access/secret keys and iam-role cannot be specified
+    VELOX_ASSERT_THROW(
+        filesystems::S3FileSystem("", hiveConfig),
+        "Invalid configuration: specify only one among 'access/secret keys', 'use instance credentials', 'IAM role'");
+  }
+  {
+    std::unordered_map<std::string, std::string> config(
+        {{"hive.s3.aws-secret-key", "dummy"},
+         {"hive.s3.aws-access-key", "dummy"},
+         {"hive.s3.use-instance-credentials", "true"}});
+    auto hiveConfig =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+    // Both access/secret keys and instance credentials cannot be specified
+    VELOX_ASSERT_THROW(
+        filesystems::S3FileSystem("", hiveConfig),
+        "Invalid configuration: specify only one among 'access/secret keys', 'use instance credentials', 'IAM role'");
+  }
+  {
+    std::unordered_map<std::string, std::string> config(
+        {{"hive.s3.aws-secret-key", "dummy"}});
+    auto hiveConfig =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+    // Both access key and secret key must be specified
+    VELOX_ASSERT_THROW(
+        filesystems::S3FileSystem("", hiveConfig),
+        "Invalid configuration: both access key and secret key must be specified");
+  }
+}
+
+TEST_F(S3FileSystemTest, missingFile) {
+  const char* bucketName = "data1";
+  const char* file = "i-do-not-exist.txt";
+  const std::string s3File = s3URI(bucketName, file);
+  addBucket(bucketName);
+  auto hiveConfig = minioServer_->hiveConfig();
+  filesystems::S3FileSystem s3fs(bucketName, hiveConfig);
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      s3fs.openFileForRead(s3File),
+      error_code::kFileNotFound,
+      "Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://data1/i-do-not-exist.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
+}
+
+TEST_F(S3FileSystemTest, missingBucket) {
+  auto hiveConfig = minioServer_->hiveConfig();
+  filesystems::S3FileSystem s3fs("", hiveConfig);
+  VELOX_ASSERT_RUNTIME_THROW_CODE(
+      s3fs.openFileForRead(kDummyPath),
+      error_code::kFileNotFound,
+      "Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://dummy/foo.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
+}
+
+TEST_F(S3FileSystemTest, invalidAccessKey) {
+  auto hiveConfig =
+      minioServer_->hiveConfig({{"hive.s3.aws-access-key", "dummy-key"}});
+  filesystems::S3FileSystem s3fs("", hiveConfig);
+  // Minio credentials are wrong and this should throw
+  VELOX_ASSERT_THROW(
+      s3fs.openFileForRead(kDummyPath),
+      "Failed to get metadata for S3 object due to: 'Access denied'. Path:'s3://dummy/foo.txt', SDK Error Type:15, HTTP Status Code:403, S3 Service:'MinIO', Message:'No response body.'");
+}
+
+TEST_F(S3FileSystemTest, invalidSecretKey) {
+  auto hiveConfig =
+      minioServer_->hiveConfig({{"hive.s3.aws-secret-key", "dummy-key"}});
+  filesystems::S3FileSystem s3fs("", hiveConfig);
+  // Minio credentials are wrong and this should throw.
+  VELOX_ASSERT_THROW(
+      s3fs.openFileForRead("s3://dummy/foo.txt"),
+      "Failed to get metadata for S3 object due to: 'Access denied'. Path:'s3://dummy/foo.txt', SDK Error Type:15, HTTP Status Code:403, S3 Service:'MinIO', Message:'No response body.'");
+}
+
+TEST_F(S3FileSystemTest, noBackendServer) {
+  auto hiveConfig =
+      minioServer_->hiveConfig({{"hive.s3.aws-secret-key", "dummy-key"}});
+  filesystems::S3FileSystem s3fs("", hiveConfig);
+  // Stop Minio and check error.
+  minioServer_->stop();
+  VELOX_ASSERT_THROW(
+      s3fs.openFileForRead(kDummyPath),
+      "Failed to get metadata for S3 object due to: 'Network connection'. Path:'s3://dummy/foo.txt', SDK Error Type:99, HTTP Status Code:-1, S3 Service:'Unknown', Message:'curlCode: 7, Couldn't connect to server'");
+  // Start Minio again.
+  minioServer_->start();
+}
+
+TEST_F(S3FileSystemTest, logLevel) {
+  std::unordered_map<std::string, std::string> config;
+  auto checkLogLevelName = [&config](std::string_view expected) {
+    auto s3Config =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+    filesystems::S3FileSystem s3fs("", s3Config);
+    EXPECT_EQ(s3fs.getLogLevelName(), expected);
+  };
+
+  // Test is configured with INFO.
+  checkLogLevelName("INFO");
+
+  // S3 log level is set once during initialization.
+  // It does not change with a new config.
+  config["hive.s3.log-level"] = "Trace";
+  checkLogLevelName("INFO");
+}
+
+TEST_F(S3FileSystemTest, logLocation) {
+  // From aws-cpp-sdk-core/include/aws/core/Aws.h .
+  std::string_view kDefaultPrefix = "aws_sdk_";
+  std::unordered_map<std::string, std::string> config;
+  auto checkLogPrefix = [&config](std::string_view expected) {
+    auto s3Config =
+        std::make_shared<const config::ConfigBase>(std::move(config));
+    filesystems::S3FileSystem s3fs("", s3Config);
+    EXPECT_EQ(s3fs.getLogPrefix(), expected);
+  };
+
+  const auto expected = fmt::format("{}{}", kLogLocation_, kDefaultPrefix);
+  // Test is configured with the default.
+  checkLogPrefix(expected);
+
+  // S3 log location is set once during initialization.
+  // It does not change with a new config.
+  config["hive.s3.log-location"] = "/home/foobar";
+  checkLogPrefix(expected);
+}
+
+TEST_F(S3FileSystemTest, writeFileAndRead) {
+  const auto bucketName = "writedata";
+  const auto file = "test.txt";
+  const auto filename = localPath(bucketName) + "/" + file;
+  const auto s3File = s3URI(bucketName, file);
+
+  auto hiveConfig = minioServer_->hiveConfig();
+  filesystems::S3FileSystem s3fs(bucketName, hiveConfig);
+  auto pool = memory::memoryManager()->addLeafPool("S3FileSystemTest");
+  auto writeFile =
+      s3fs.openFileForWrite(s3File, {{}, pool.get(), std::nullopt});
+  auto s3WriteFile = dynamic_cast<filesystems::S3WriteFile*>(writeFile.get());
+  std::string dataContent =
+      "Dance me to your beauty with a burning violin"
+      "Dance me through the panic till I'm gathered safely in"
+      "Lift me like an olive branch and be my homeward dove"
+      "Dance me to the end of love";
+
+  EXPECT_EQ(writeFile->size(), 0);
+  std::int64_t contentSize = dataContent.length();
+  // dataContent length is 178.
+  EXPECT_EQ(contentSize, 178);
+
+  // Append and flush a small batch of data.
+  writeFile->append(dataContent.substr(0, 10));
+  EXPECT_EQ(writeFile->size(), 10);
+  writeFile->append(dataContent.substr(10, contentSize - 10));
+  EXPECT_EQ(writeFile->size(), contentSize);
+  writeFile->flush();
+  // No parts must have been uploaded.
+  EXPECT_EQ(s3WriteFile->numPartsUploaded(), 0);
+
+  // Append data 178 * 100'000 ~ 16MiB.
+  // Should have 1 part in total with kUploadPartSize = 10MiB.
+  for (int i = 0; i < 100'000; ++i) {
+    writeFile->append(dataContent);
+  }
+  EXPECT_EQ(s3WriteFile->numPartsUploaded(), 1);
+  EXPECT_EQ(writeFile->size(), 100'001 * contentSize);
+
+  // Append a large data buffer 178 * 150'000 ~ 25MiB (2 parts).
+  std::vector<char> largeBuffer(contentSize * 150'000);
+  for (int i = 0; i < 150'000; ++i) {
+    memcpy(
+        largeBuffer.data() + (i * contentSize),
+        dataContent.data(),
+        contentSize);
+  }
+
+  writeFile->append({largeBuffer.data(), largeBuffer.size()});
+  EXPECT_EQ(writeFile->size(), 250'001 * contentSize);
+  // Total data = ~41 MB = 5 parts.
+  // But parts uploaded will be 4.
+  EXPECT_EQ(s3WriteFile->numPartsUploaded(), 4);
+
+  // Upload the last part.
+  writeFile->close();
+  EXPECT_EQ(s3WriteFile->numPartsUploaded(), 5);
+
+  VELOX_ASSERT_THROW(
+      writeFile->append(dataContent.substr(0, 10)), "File is closed");
+
+  auto readFile = s3fs.openFileForRead(s3File);
+  ASSERT_EQ(readFile->size(), contentSize * 250'001);
+  // Sample and verify every 1'000 dataContent chunks.
+  for (int i = 0; i < 250; ++i) {
+    ASSERT_EQ(
+        readFile->pread(i * (1'000 * contentSize), contentSize), dataContent);
+  }
+  // Verify the last chunk.
+  ASSERT_EQ(readFile->pread(contentSize * 250'000, contentSize), dataContent);
+}
+
+TEST_F(S3FileSystemTest, invalidConnectionSettings) {
+  auto hiveConfig =
+      minioServer_->hiveConfig({{"hive.s3.connect-timeout", "400"}});
+  VELOX_ASSERT_THROW(
+      filesystems::S3FileSystem("", hiveConfig), "Invalid duration");
+
+  hiveConfig = minioServer_->hiveConfig({{"hive.s3.socket-timeout", "abc"}});
+  VELOX_ASSERT_THROW(
+      filesystems::S3FileSystem("", hiveConfig), "Invalid duration");
+}
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3InsertTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3InsertTest.cpp
new file mode 100644
index 000000000000..fcf8b09e4a7b
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3InsertTest.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/init/Init.h>
+#include <gtest/gtest.h>
+
+#include "S3Test.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/test_common/InsertTest.h"
+#include "velox/dwio/parquet/RegisterParquetReader.h"
+#include "velox/dwio/parquet/RegisterParquetWriter.h"
+
+namespace facebook::velox::filesystems {
+namespace {
+
+class S3InsertTest : public S3Test, public test::InsertTest {
+ protected:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  void SetUp() override {
+    S3Test::SetUp();
+    filesystems::registerS3FileSystem();
+    connector::registerConnectorFactory(
+        std::make_shared<connector::hive::HiveConnectorFactory>());
+    auto hiveConnector =
+        connector::getConnectorFactory(
+            connector::hive::HiveConnectorFactory::kHiveConnectorName)
+            ->newConnector(
+                ::connector::hive::test::kHiveConnectorId,
+                minioServer_->hiveConfig(),
+                ioExecutor_.get());
+    connector::registerConnector(hiveConnector);
+    parquet::registerParquetReaderFactory();
+    parquet::registerParquetWriterFactory();
+  }
+
+  void TearDown() override {
+    parquet::unregisterParquetReaderFactory();
+    parquet::unregisterParquetWriterFactory();
+    connector::unregisterConnectorFactory(
+        connector::hive::HiveConnectorFactory::kHiveConnectorName);
+    connector::unregisterConnector(::connector::hive::test::kHiveConnectorId);
+    S3Test::TearDown();
+    filesystems::finalizeS3FileSystem();
+  }
+};
+} // namespace
+
+TEST_F(S3InsertTest, s3InsertTest) {
+  const int64_t kExpectedRows = 1'000;
+  const std::string_view kOutputDirectory{"s3://writedata/"};
+  minioServer_->addBucket("writedata");
+
+  runInsertTest(kOutputDirectory, kExpectedRows, pool());
+}
+} // namespace facebook::velox::filesystems
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  folly::Init init{&argc, &argv, false};
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp
new file mode 100644
index 000000000000..606613d7aaa2
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3MultipleEndpointsTest.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/init/Init.h>
+
+#include "S3Test.h"
+#include "gtest/gtest.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h"
+#include "velox/dwio/parquet/RegisterParquetReader.h"
+#include "velox/dwio/parquet/RegisterParquetWriter.h"
+#include "velox/exec/TableWriter.h"
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
+#include "velox/exec/tests/utils/PlanBuilder.h"
+
+static const std::string_view kConnectorId1 = "test-hive1";
+static const std::string_view kConnectorId2 = "test-hive2";
+static const std::string_view kBucketName = "writedata";
+
+using namespace facebook::velox::exec::test;
+
+namespace facebook::velox {
+namespace {
+
+class S3MultipleEndpoints : public S3Test, public ::test::VectorTestBase {
+ public:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+  static void TearDownTestCase() {
+    filesystems::finalizeS3FileSystem();
+  }
+
+  void SetUp() override {
+    S3Test::SetUp();
+    minioSecondServer_ = std::make_unique<MinioServer>();
+    minioSecondServer_->start();
+    minioServer_->addBucket(kBucketName.data());
+    minioSecondServer_->addBucket(kBucketName.data());
+
+    filesystems::registerS3FileSystem();
+    connector::registerConnectorFactory(
+        std::make_shared<connector::hive::HiveConnectorFactory>());
+    parquet::registerParquetReaderFactory();
+    parquet::registerParquetWriterFactory();
+  }
+
+  void registerConnectors(
+      std::string_view connectorId1,
+      std::string_view connectorId2,
+      const std::unordered_map<std::string, std::string> config1Override = {},
+      const std::unordered_map<std::string, std::string> config2Override = {}) {
+    auto hiveConnector1 =
+        connector::getConnectorFactory(
+            connector::hive::HiveConnectorFactory::kHiveConnectorName)
+            ->newConnector(
+                std::string(connectorId1),
+                minioServer_->hiveConfig(config1Override),
+                ioExecutor_.get());
+    auto hiveConnector2 =
+        connector::getConnectorFactory(
+            connector::hive::HiveConnectorFactory::kHiveConnectorName)
+            ->newConnector(
+                std::string(connectorId2),
+                minioSecondServer_->hiveConfig(config2Override),
+                ioExecutor_.get());
+    connector::registerConnector(hiveConnector1);
+    connector::registerConnector(hiveConnector2);
+  }
+
+  void TearDown() override {
+    parquet::unregisterParquetReaderFactory();
+    parquet::unregisterParquetWriterFactory();
+    connector::unregisterConnectorFactory(
+        connector::hive::HiveConnectorFactory::kHiveConnectorName);
+    S3Test::TearDown();
+  }
+
+  folly::dynamic writeData(
+      const RowVectorPtr input,
+      const std::string& outputDirectory,
+      const std::string& connectorId) {
+    auto plan = PlanBuilder()
+                    .values({input})
+                    .tableWrite(
+                        outputDirectory.data(),
+                        {},
+                        0,
+                        {},
+                        {},
+                        dwio::common::FileFormat::PARQUET,
+                        {},
+                        connectorId)
+                    .planNode();
+    // Execute the write plan.
+    auto results = AssertQueryBuilder(plan).copyResults(pool());
+    // Second column contains details about written files.
+    auto details = results->childAt(exec::TableWriteTraits::kFragmentChannel)
+                       ->as<FlatVector<StringView>>();
+    folly::dynamic obj = folly::parseJson(details->valueAt(1));
+    return obj["fileWriteInfos"];
+  }
+
+  std::shared_ptr<connector::hive::HiveConnectorSplit> createSplit(
+      folly::dynamic tableWriteInfo,
+      std::string outputDirectory,
+      std::string connectorId) {
+    auto writeFileName = tableWriteInfo[0]["writeFileName"].asString();
+    auto filePath = fmt::format("{}{}", outputDirectory, writeFileName);
+    const int64_t fileSize = tableWriteInfo[0]["fileSize"].asInt();
+
+    return HiveConnectorSplitBuilder(filePath)
+        .connectorId(connectorId)
+        .fileFormat(dwio::common::FileFormat::PARQUET)
+        .length(fileSize)
+        .build();
+  }
+
+  void testJoin(
+      int numRows,
+      std::string_view outputDirectory,
+      std::string_view connectorId1,
+      std::string_view connectorId2) {
+    auto rowType1 = ROW(
+        {"a0", "a1", "a2", "a3"}, {BIGINT(), INTEGER(), SMALLINT(), DOUBLE()});
+    auto rowType2 = ROW(
+        {"b0", "b1", "b2", "b3"}, {BIGINT(), INTEGER(), SMALLINT(), DOUBLE()});
+
+    auto input1 = makeRowVector(
+        rowType1->names(),
+        {makeFlatVector<int64_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<int32_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<int16_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<double>(numRows, [](auto row) { return row; })});
+    auto input2 = makeRowVector(rowType2->names(), input1->children());
+
+    // Insert input data into both tables.
+    auto table1WriteInfo =
+        writeData(input1, outputDirectory.data(), std::string(connectorId1));
+    auto table2WriteInfo =
+        writeData(input2, outputDirectory.data(), std::string(connectorId2));
+
+    // Inner Join both the tables.
+    core::PlanNodeId scan1, scan2;
+    auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
+    auto table1Scan = PlanBuilder(planNodeIdGenerator, pool())
+                          .startTableScan()
+                          .tableName("hive_table1")
+                          .outputType(rowType1)
+                          .connectorId(std::string(connectorId1))
+                          .endTableScan()
+                          .capturePlanNodeId(scan1)
+                          .planNode();
+    auto join =
+        PlanBuilder(planNodeIdGenerator, pool())
+            .startTableScan()
+            .tableName("hive_table1")
+            .outputType(rowType2)
+            .connectorId(std::string(connectorId2))
+            .endTableScan()
+            .capturePlanNodeId(scan2)
+            .hashJoin({"b0"}, {"a0"}, table1Scan, "", {"a0", "a1", "a2", "a3"})
+            .planNode();
+
+    auto split1 = createSplit(
+        table1WriteInfo, outputDirectory.data(), std::string(connectorId1));
+    auto split2 = createSplit(
+        table2WriteInfo, outputDirectory.data(), std::string(connectorId2));
+    auto results = AssertQueryBuilder(join)
+                       .split(scan1, split1)
+                       .split(scan2, split2)
+                       .copyResults(pool());
+    assertEqualResults({input1}, {results});
+  }
+
+  std::unique_ptr<MinioServer> minioSecondServer_;
+};
+} // namespace
+
+TEST_F(S3MultipleEndpoints, baseEndpoints) {
+  const int64_t kExpectedRows = 1'000;
+  const auto outputDirectory{filesystems::s3URI(kBucketName, "")};
+
+  registerConnectors(kConnectorId1, kConnectorId2);
+
+  testJoin(kExpectedRows, outputDirectory, kConnectorId1, kConnectorId2);
+
+  connector::unregisterConnector(std::string(kConnectorId1));
+  connector::unregisterConnector(std::string(kConnectorId2));
+}
+
+TEST_F(S3MultipleEndpoints, bucketEndpoints) {
+  const int64_t kExpectedRows = 1'000;
+  const auto outputDirectory{filesystems::s3URI(kBucketName, "")};
+
+  auto configOverride = [](std::shared_ptr<const config::ConfigBase> config) {
+    return std::unordered_map<std::string, std::string>{
+        {"hive.s3.bucket.writedata.endpoint",
+         config->get<std::string>("hive.s3.endpoint").value()},
+        {"hive.s3.bucket.writedata.aws-access-key",
+         config->get<std::string>("hive.s3.aws-access-key").value()},
+        {"hive.s3.bucket.writedata.aws-secret-key",
+         config->get<std::string>("hive.s3.aws-secret-key").value()},
+        {"hive.s3.endpoint", "fail"},
+        {"hive.s3.aws-access-key", "fail"},
+        {"hive.s3.aws-secret-key", "fail"},
+    };
+  };
+  auto config1 = configOverride(minioServer_->hiveConfig());
+  auto config2 = configOverride(minioSecondServer_->hiveConfig());
+  registerConnectors(kConnectorId1, kConnectorId2, config1, config2);
+
+  testJoin(kExpectedRows, outputDirectory, kConnectorId1, kConnectorId2);
+
+  connector::unregisterConnector(std::string(kConnectorId1));
+  connector::unregisterConnector(std::string(kConnectorId2));
+}
+
+} // namespace facebook::velox
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  folly::Init init{&argc, &argv, false};
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ReadTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ReadTest.cpp
new file mode 100644
index 000000000000..9d6c5c0f5fb8
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3ReadTest.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/init/Init.h>
+#include <gtest/gtest.h>
+
+#include "S3Test.h"
+#include "velox/common/memory/Memory.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/RegisterS3FileSystem.h"
+#include "velox/dwio/common/tests/utils/DataFiles.h"
+#include "velox/dwio/parquet/RegisterParquetReader.h"
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
+#include "velox/exec/tests/utils/PlanBuilder.h"
+
+using namespace facebook::velox::exec::test;
+
+namespace facebook::velox::filesystems {
+namespace {
+
+class S3ReadTest : public S3Test, public ::test::VectorTestBase {
+ protected:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  void SetUp() override {
+    S3Test::SetUp();
+    filesystems::registerS3FileSystem();
+    connector::registerConnectorFactory(
+        std::make_shared<connector::hive::HiveConnectorFactory>());
+    auto hiveConnector =
+        connector::getConnectorFactory(
+            connector::hive::HiveConnectorFactory::kHiveConnectorName)
+            ->newConnector(kHiveConnectorId, minioServer_->hiveConfig());
+    connector::registerConnector(hiveConnector);
+    parquet::registerParquetReaderFactory();
+  }
+
+  void TearDown() override {
+    parquet::unregisterParquetReaderFactory();
+    filesystems::finalizeS3FileSystem();
+    connector::unregisterConnectorFactory(
+        connector::hive::HiveConnectorFactory::kHiveConnectorName);
+    connector::unregisterConnector(kHiveConnectorId);
+    S3Test::TearDown();
+  }
+};
+} // namespace
+
+TEST_F(S3ReadTest, s3ReadTest) {
+  const auto sourceFile = test::getDataFilePath(
+      "velox/connectors/lakehouse/storage_adapters/s3fs/tests",
+      "../../../../../dwio/parquet/tests/examples/int.parquet");
+  const char* bucketName = "data";
+  const auto destinationFile = S3Test::localPath(bucketName) + "/int.parquet";
+  minioServer_->addBucket(bucketName);
+  std::ifstream src(sourceFile, std::ios::binary);
+  std::ofstream dest(destinationFile, std::ios::binary);
+  // Copy source file to destination bucket.
+  dest << src.rdbuf();
+  ASSERT_GT(dest.tellp(), 0) << "Unable to copy from source " << sourceFile;
+  dest.close();
+
+  // Read the parquet file via the S3 bucket.
+  auto rowType = ROW({"int", "bigint"}, {INTEGER(), BIGINT()});
+  auto plan = PlanBuilder().tableScan(rowType).planNode();
+  auto split = HiveConnectorSplitBuilder(s3URI(bucketName, "int.parquet"))
+                   .fileFormat(dwio::common::FileFormat::PARQUET)
+                   .build();
+  auto copy = AssertQueryBuilder(plan).split(split).copyResults(pool());
+
+  // expectedResults is the data in int.parquet file.
+  const int64_t kExpectedRows = 10;
+  auto expectedResults = makeRowVector(
+      {makeFlatVector<int32_t>(
+           kExpectedRows, [](auto row) { return row + 100; }),
+       makeFlatVector<int64_t>(
+           kExpectedRows, [](auto row) { return row + 1000; })});
+  assertEqualResults({expectedResults}, {copy});
+}
+} // namespace facebook::velox::filesystems
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  folly::Init init{&argc, &argv, false};
+  return RUN_ALL_TESTS();
+}
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3Test.h b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3Test.h
new file mode 100644
index 000000000000..50a01c8a0b48
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3Test.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MinioServer.h"
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/common/file/File.h"
+#include "velox/connectors/common/tests/utils/HiveConnectorTestBase.h"
+#include "velox/connectors/hive/FileHandle.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3FileSystem.h"
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h"
+#include "velox/exec/tests/utils/TempFilePath.h"
+
+#include "gtest/gtest.h"
+
+using namespace facebook::velox;
+
+constexpr int kOneMB = 1 << 20;
+
+static constexpr std::string_view kDummyPath = "s3://dummy/foo.txt";
+
+class S3Test : public testing::Test {
+ protected:
+  void SetUp() override {
+    minioServer_ = std::make_unique<MinioServer>();
+    minioServer_->start();
+    ioExecutor_ = std::make_unique<folly::IOThreadPoolExecutor>(3);
+  }
+
+  void TearDown() override {
+    minioServer_->stop();
+  }
+
+  void addBucket(const char* bucket) {
+    minioServer_->addBucket(bucket);
+  }
+
+  std::string localPath(const char* directory) {
+    return minioServer_->path() + "/" + directory;
+  }
+
+  void writeData(WriteFile* writeFile) {
+    writeFile->append("aaaaa");
+    writeFile->append("bbbbb");
+    writeFile->append(std::string(kOneMB, 'c'));
+    writeFile->append("ddddd");
+    ASSERT_EQ(writeFile->size(), 15 + kOneMB);
+  }
+
+  void readData(ReadFile* readFile) {
+    ASSERT_EQ(readFile->size(), 15 + kOneMB);
+    char buffer1[5];
+    ASSERT_EQ(readFile->pread(10 + kOneMB, 5, &buffer1), "ddddd");
+    char buffer2[10];
+    ASSERT_EQ(readFile->pread(0, 10, &buffer2), "aaaaabbbbb");
+    char buffer3[kOneMB];
+    ASSERT_EQ(readFile->pread(10, kOneMB, &buffer3), std::string(kOneMB, 'c'));
+    ASSERT_EQ(readFile->size(), 15 + kOneMB);
+    char buffer4[10];
+    const std::string_view arf = readFile->pread(5, 10, &buffer4);
+    const std::string zarf = readFile->pread(kOneMB, 15);
+    auto buf = std::make_unique<char[]>(8);
+    const std::string_view warf = readFile->pread(4, 8, buf.get());
+    const std::string_view warfFromBuf(buf.get(), 8);
+    ASSERT_EQ(arf, "bbbbbccccc");
+    ASSERT_EQ(zarf, "ccccccccccddddd");
+    ASSERT_EQ(warf, "abbbbbcc");
+    ASSERT_EQ(warfFromBuf, "abbbbbcc");
+    char head[12];
+    char middle[4];
+    char tail[7];
+    std::vector<folly::Range<char*>> buffers = {
+        folly::Range<char*>(head, sizeof(head)),
+        folly::Range<char*>(nullptr, (char*)(uint64_t)500000),
+        folly::Range<char*>(middle, sizeof(middle)),
+        folly::Range<char*>(
+            nullptr,
+            (char*)(uint64_t)(15 + kOneMB - 500000 - sizeof(head) -
+                              sizeof(middle) - sizeof(tail))),
+        folly::Range<char*>(tail, sizeof(tail))};
+    ASSERT_EQ(15 + kOneMB, readFile->preadv(0, buffers));
+    ASSERT_EQ(std::string_view(head, sizeof(head)), "aaaaabbbbbcc");
+    ASSERT_EQ(std::string_view(middle, sizeof(middle)), "cccc");
+    ASSERT_EQ(std::string_view(tail, sizeof(tail)), "ccddddd");
+  }
+
+  std::unique_ptr<MinioServer> minioServer_;
+  std::unique_ptr<folly::IOThreadPoolExecutor> ioExecutor_;
+};
diff --git a/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3UtilTest.cpp b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3UtilTest.cpp
new file mode 100644
index 000000000000..76c7c466da41
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/s3fs/tests/S3UtilTest.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "velox/connectors/lakehouse/storage_adapters/s3fs/S3Util.h"
+
+#include "gtest/gtest.h"
+
+namespace facebook::velox::filesystems {
+
+// TODO: Each prefix should be implemented as its own filesystem.
+TEST(S3UtilTest, isS3File) {
+  EXPECT_FALSE(isS3File("ss3://"));
+  EXPECT_FALSE(isS3File("s3:/"));
+  EXPECT_FALSE(isS3File("oss:"));
+  EXPECT_FALSE(isS3File("cos:"));
+  EXPECT_FALSE(isS3File("cosn:"));
+  EXPECT_FALSE(isS3File("S3A://bucket/some/file.txt"));
+  EXPECT_FALSE(isS3File("OSS://other-bucket/some/file.txt"));
+  EXPECT_FALSE(isS3File("COS://other-bucket/some/file.txt"));
+  EXPECT_FALSE(isS3File("COSN://other-bucket/some/file.txt"));
+  EXPECT_FALSE(isS3File("s3::/bucket"));
+  EXPECT_FALSE(isS3File("s3:/bucket"));
+  EXPECT_FALSE(isS3File("file://bucket"));
+  EXPECT_TRUE(isS3File("s3://bucket/file.txt"));
+  EXPECT_TRUE(isS3File("s3n://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isS3AwsFile) {
+  EXPECT_FALSE(isS3AwsFile("s3:"));
+  EXPECT_FALSE(isS3AwsFile("s3::/bucket"));
+  EXPECT_FALSE(isS3AwsFile("s3:/bucket"));
+  EXPECT_TRUE(isS3AwsFile("s3://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isS3aFile) {
+  EXPECT_FALSE(isS3aFile("s3a:"));
+  EXPECT_FALSE(isS3aFile("s3a::/bucket"));
+  EXPECT_FALSE(isS3aFile("s3a:/bucket"));
+  EXPECT_FALSE(isS3aFile("S3A://bucket-name/file.txt"));
+  EXPECT_TRUE(isS3aFile("s3a://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isS3nFile) {
+  EXPECT_FALSE(isS3nFile("s3n:"));
+  EXPECT_FALSE(isS3nFile("s3n::/bucket"));
+  EXPECT_FALSE(isS3nFile("s3n:/bucket"));
+  EXPECT_FALSE(isS3nFile("S3N://bucket-name/file.txt"));
+  EXPECT_TRUE(isS3nFile("s3n://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isOssFile) {
+  EXPECT_FALSE(isOssFile("oss:"));
+  EXPECT_FALSE(isOssFile("oss::/bucket"));
+  EXPECT_FALSE(isOssFile("oss:/bucket"));
+  EXPECT_FALSE(isOssFile("OSS://BUCKET/sub-key/file.txt"));
+  EXPECT_TRUE(isOssFile("oss://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isCosFile) {
+  EXPECT_FALSE(isCosFile("cos:"));
+  EXPECT_FALSE(isCosFile("cos::/bucket"));
+  EXPECT_FALSE(isCosFile("cos:/bucket"));
+  EXPECT_FALSE(isCosFile("COS://BUCKET/sub-key/file.txt"));
+  EXPECT_TRUE(isCosFile("cos://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, isCosNFile) {
+  EXPECT_FALSE(isCosNFile("cosn:"));
+  EXPECT_FALSE(isCosNFile("cosn::/bucket"));
+  EXPECT_FALSE(isCosNFile("cosn:/bucket"));
+  EXPECT_FALSE(isCosNFile("COSN://BUCKET/sub-key/file.txt"));
+  EXPECT_TRUE(isCosNFile("cosn://bucket/file.txt"));
+}
+
+TEST(S3UtilTest, s3Path) {
+  auto path_0 = getPath("s3://bucket/file.txt");
+  auto path_1 = getPath("oss://bucket-name/file.txt");
+  auto path_2 = getPath("S3A://bucket-NAME/sub-PATH/my-file.txt");
+  auto path_3 = getPath("s3N://bucket-NAME/sub-PATH/my-file.txt");
+  auto path_4 = getPath("cos://bucket-name/file.txt");
+  auto path_5 = getPath("cosn://bucket-name/file.txt");
+  EXPECT_EQ(path_0, "bucket/file.txt");
+  EXPECT_EQ(path_1, "bucket-name/file.txt");
+  EXPECT_NE(path_2, "bucket-NAME/sub-PATH/my-file.txt");
+  EXPECT_NE(path_3, "bucket-NAME/sub-PATH/my-file.txt");
+  EXPECT_EQ(path_4, "bucket-name/file.txt");
+  EXPECT_EQ(path_5, "bucket-name/file.txt");
+}
+
+TEST(S3UtilTest, bucketAndKeyFromgetPath) {
+  std::string bucket, key;
+  auto path = "bucket/file.txt";
+  getBucketAndKeyFromPath(path, bucket, key);
+  EXPECT_EQ(bucket, "bucket");
+  EXPECT_EQ(key, "file.txt");
+}
+
+TEST(S3UtilTest, isDomainExcludedFromProxy) {
+  auto hostname = "test.foobar.com";
+
+  std::vector<std::pair<std::string, bool>> tests = {
+      {"localhost,.foobar.com", true},
+      {"localhost,.,foobar.com,.com", true},
+      {"localhost,test.foobar.com", true},
+      {"localhost,foobar.com,*.com", true},
+      {"localhost,*.foobar.com", true},
+      {"localhost", false},
+      {"localhost,foobar.com", false},
+      {"", false},
+  };
+
+  for (auto pair : tests) {
+    EXPECT_EQ(isHostExcludedFromProxy(hostname, pair.first), pair.second);
+  }
+}
+
+TEST(S3UtilTest, parseAWSRegion) {
+  // bucket.s3.[region]
+  EXPECT_EQ(
+      parseAWSStandardRegionName("foo.s3.region.amazonaws.com"), "region");
+  EXPECT_EQ(
+      parseAWSStandardRegionName("foo.s3.region.amazonaws.com/"), "region");
+  // bucket.s3-[region]
+  EXPECT_EQ(
+      parseAWSStandardRegionName("foo.s3-region.amazonaws.com"), "region");
+  EXPECT_EQ(
+      parseAWSStandardRegionName("foo.s3-region.amazonaws.com/"), "region");
+  // service.[region]
+  EXPECT_EQ(parseAWSStandardRegionName("foo.a3-reg.amazonaws.com"), "a3-reg");
+  EXPECT_EQ(parseAWSStandardRegionName("foo.a3-reg.amazonaws.com/"), "a3-reg");
+  // Not the right suffix
+  EXPECT_EQ(
+      parseAWSStandardRegionName("foo.a3-region.amazon.com"), std::nullopt);
+  EXPECT_EQ(parseAWSStandardRegionName(""), std::nullopt);
+  EXPECT_EQ(parseAWSStandardRegionName("velox"), std::nullopt);
+}
+
+TEST(S3UtilTest, isIpExcludedFromProxy) {
+  auto hostname = "127.0.0.1";
+
+  std::vector<std::pair<std::string, bool>> tests = {
+      {"localhost,127.0.0.1,.foobar.com", true},
+      {"localhost,127.0.0.0/24,.foobar.com", true},
+      {"localhost,foobar.com,127.0.0.0/16,.1,.com", true},
+      {"localhost,foobar.com,.1,.com", true},
+      {"localhost,test.foobar.com", false},
+      {"localhost,foobar.com,*.1,*.com", true},
+      {"localhost", false},
+      {"localhost,127.1.0.1", false},
+      {"", false},
+  };
+
+  for (auto pair : tests) {
+    EXPECT_EQ(isHostExcludedFromProxy(hostname, pair.first), pair.second)
+        << pair.first;
+  }
+}
+
+class S3UtilProxyTest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(S3UtilProxyTest, proxyBuilderBadEndpoint) {
+  auto s3Endpoint = "http://127.0.0.1:8888";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "http://127.0.0.1:12345", 1);
+  setenv("HTTPS_PROXY", "http://127.0.0.1:12345", 1);
+  EXPECT_FALSE(S3ProxyConfigurationBuilder(s3Endpoint)
+                   .useSsl(useSsl)
+                   .build()
+                   .has_value());
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderNoProxy) {
+  auto s3Endpoint = "127.0.0.1:8888";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "", 1);
+  setenv("HTTPS_PROXY", "", 1);
+  EXPECT_FALSE(S3ProxyConfigurationBuilder(s3Endpoint)
+                   .useSsl(useSsl)
+                   .build()
+                   .has_value());
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderSameHttpProxy) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  setenv("HTTP_PROXY", "http://127.0.0.1:8888", 1);
+  setenv("HTTPS_PROXY", "http://127.0.0.1:8888", 1);
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), "http");
+  EXPECT_EQ(proxyConfig.value().host(), "127.0.0.1");
+  EXPECT_EQ(proxyConfig.value().port(), 8888);
+  EXPECT_EQ(proxyConfig.value().username(), "");
+  EXPECT_EQ(proxyConfig.value().password(), "");
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderMixProxy) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  const std::string httpProxy = "https://test1:testpw1@80.67.3.1:35631";
+  setenv("HTTP_PROXY", httpProxy.c_str(), 1);
+  EXPECT_EQ(getHttpProxyEnvVar(), httpProxy)
+      << "HTTP_PROXY environment variable not set.";
+  const std::string httpsProxy = "http://test2:testpw2@80.80.5.1:45631";
+  setenv("HTTPS_PROXY", httpsProxy.c_str(), 1);
+  EXPECT_EQ(getHttpsProxyEnvVar(), httpsProxy)
+      << "HTTPS_PROXY environment variable not set.";
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), (useSsl ? "http" : "https"));
+  EXPECT_EQ(proxyConfig.value().host(), (useSsl ? "80.80.5.1" : "80.67.3.1"));
+  EXPECT_EQ(proxyConfig.value().port(), (useSsl ? 45631 : 35631));
+  EXPECT_EQ(proxyConfig.value().username(), (useSsl ? "test2" : "test1"));
+  EXPECT_EQ(proxyConfig.value().password(), (useSsl ? "testpw2" : "testpw1"));
+}
+
+TEST_P(S3UtilProxyTest, proxyBuilderMixProxyLowerCase) {
+  auto s3Endpoint = "192.168.0.1:12345";
+  auto useSsl = GetParam();
+
+  const std::string lcHttpProxy = "https://lctest1:lctestpw1@80.67.3.1:35631";
+  const std::string ucHttpProxy = "https://uctest1:uctestpw1@80.67.3.2:35632";
+  setenv("http_proxy", lcHttpProxy.c_str(), 1);
+  setenv("HTTP_PROXY", ucHttpProxy.c_str(), 1);
+  // Lower case value takes precedence.
+  EXPECT_EQ(getHttpProxyEnvVar(), lcHttpProxy)
+      << "http_proxy environment variable not set.";
+  const std::string lcHttpsProxy = "http://lctest2:lctestpw2@80.80.5.1:45631";
+  const std::string ucHttpsProxy = "http://uctest2:uctestpw2@80.80.5.2:45632";
+  setenv("https_proxy", lcHttpsProxy.c_str(), 1);
+  setenv("HTTPS_PROXY", ucHttpsProxy.c_str(), 1);
+  EXPECT_EQ(getHttpsProxyEnvVar(), lcHttpsProxy)
+      << "https_proxy environment variable not set.";
+  auto proxyConfig =
+      S3ProxyConfigurationBuilder(s3Endpoint).useSsl(useSsl).build();
+  ASSERT_TRUE(proxyConfig.has_value());
+  EXPECT_EQ(proxyConfig.value().scheme(), (useSsl ? "http" : "https"));
+  EXPECT_EQ(proxyConfig.value().host(), (useSsl ? "80.80.5.1" : "80.67.3.1"));
+  EXPECT_EQ(proxyConfig.value().port(), (useSsl ? 45631 : 35631));
+  EXPECT_EQ(proxyConfig.value().username(), (useSsl ? "lctest2" : "lctest1"));
+  EXPECT_EQ(
+      proxyConfig.value().password(), (useSsl ? "lctestpw2" : "lctestpw1"));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    S3UtilTest,
+    S3UtilProxyTest,
+    ::testing::Values(true, false));
+
+} // namespace facebook::velox::filesystems
diff --git a/velox/connectors/lakehouse/storage_adapters/test_common/InsertTest.h b/velox/connectors/lakehouse/storage_adapters/test_common/InsertTest.h
new file mode 100644
index 000000000000..c1231eeb9d2e
--- /dev/null
+++ b/velox/connectors/lakehouse/storage_adapters/test_common/InsertTest.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/init/Init.h>
+#include <gtest/gtest.h>
+
+#include "velox/common/memory/Memory.h"
+#include "velox/dwio/parquet/RegisterParquetReader.h"
+#include "velox/dwio/parquet/RegisterParquetWriter.h"
+#include "velox/exec/TableWriter.h"
+#include "velox/exec/tests/utils/AssertQueryBuilder.h"
+#include "velox/connectors/common/tests/utils/HiveConnectorTestBase.h"
+#include "velox/exec/tests/utils/PlanBuilder.h"
+#include "velox/vector/tests/utils/VectorTestBase.h"
+
+namespace facebook::velox::test {
+
+class InsertTest : public velox::test::VectorTestBase {
+ public:
+  void runInsertTest(
+      std::string_view outputDirectory,
+      int numRows,
+      memory::MemoryPool* pool) {
+    auto rowType = ROW(
+        {"c0", "c1", "c2", "c3"}, {BIGINT(), INTEGER(), SMALLINT(), DOUBLE()});
+
+    auto input = makeRowVector(
+        {makeFlatVector<int64_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<int32_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<int16_t>(numRows, [](auto row) { return row; }),
+         makeFlatVector<double>(numRows, [](auto row) { return row; })});
+
+    // Insert with one writer.
+    auto plan =
+        exec::test::PlanBuilder()
+            .values({input})
+            .tableWrite(
+                outputDirectory.data(), dwio::common::FileFormat::PARQUET)
+            .planNode();
+
+    // Execute the write plan.
+    auto results = exec::test::AssertQueryBuilder(plan).copyResults(pool);
+
+    // First column has number of rows written in the first row and nulls in
+    // other rows.
+    auto rowCount = results->childAt(exec::TableWriteTraits::kRowCountChannel)
+                        ->as<FlatVector<int64_t>>();
+    ASSERT_FALSE(rowCount->isNullAt(0));
+    ASSERT_EQ(numRows, rowCount->valueAt(0));
+    ASSERT_TRUE(rowCount->isNullAt(1));
+
+    // Second column contains details about written files.
+    auto details = results->childAt(exec::TableWriteTraits::kFragmentChannel)
+                       ->as<FlatVector<StringView>>();
+    ASSERT_TRUE(details->isNullAt(0));
+    ASSERT_FALSE(details->isNullAt(1));
+    folly::dynamic obj = folly::parseJson(details->valueAt(1));
+
+    ASSERT_EQ(numRows, obj["rowCount"].asInt());
+    auto fileWriteInfos = obj["fileWriteInfos"];
+    ASSERT_EQ(1, fileWriteInfos.size());
+
+    auto writeFileName = fileWriteInfos[0]["writeFileName"].asString();
+
+    // Read from 'writeFileName' and verify the data matches the original.
+    plan = exec::test::PlanBuilder().tableScan(rowType).planNode();
+
+    auto filePath = fmt::format("{}{}", outputDirectory, writeFileName);
+    const int64_t fileSize = fileWriteInfos[0]["fileSize"].asInt();
+    auto split = HiveConnectorSplitBuilder(filePath)
+                     .fileFormat(dwio::common::FileFormat::PARQUET)
+                     .length(fileSize)
+                     .build();
+    auto copy =
+        exec::test::AssertQueryBuilder(plan).split(split).copyResults(pool);
+    exec::test::assertEqualResults({input}, {copy});
+  }
+};
+} // namespace facebook::velox::test
diff --git a/velox/experimental/wave/exec/Wave.h b/velox/experimental/wave/exec/Wave.h
index 1c11548e87ed..afdc20537e42 100644
--- a/velox/experimental/wave/exec/Wave.h
+++ b/velox/experimental/wave/exec/Wave.h
@@ -565,7 +565,7 @@ class Program : public std::enable_shared_from_this<Program> {
   void releaseExe(std::unique_ptr<Executable>&& exe) {
     std::lock_guard<std::mutex> l(mutex_);
     // The exe being freed should not be the last reference to the Program.
-    VELOX_CHECK(!exe->programShared.unique());
+    VELOX_CHECK(!exe->programShared->unique());
     exe->programShared = nullptr;
     prepared_.push_back(std::move(exe));
   }