apache · jackye1995 · Feb 8, 2022 · Feb 8, 2022 · Feb 8, 2022 · Feb 8, 2022
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -32,7 +32,7 @@ BUILD:
   - "**/*gradle*"
   - versions.props
 DOCS:
-  - site/**/*
+  - docs/**/*
   - "**/*CHANGELOG.md"
   - "**/*README.md"
 EXAMPLES:

diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml
@@ -35,7 +35,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
   pull_request:
     paths-ignore:
     - '.github/workflows/python-ci.yml'
@@ -51,7 +51,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
 
 jobs:
   flink-tests:

diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml
@@ -33,7 +33,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
   pull_request:
     paths-ignore:
     - '.github/workflows/python-ci.yml'
@@ -47,7 +47,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
 
 jobs:
   hive2-tests:

diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml
@@ -30,7 +30,7 @@ on:
     - 'dev/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
   pull_request:
     paths-ignore:
     - '.github/workflows/python-ci.yml'
@@ -41,7 +41,7 @@ on:
     - 'dev/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
 
 jobs:
   core-tests:

diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
@@ -35,7 +35,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
   pull_request:
     paths-ignore:
     - '.github/workflows/python-ci.yml'
@@ -51,7 +51,7 @@ on:
     - 'pig/**'
     - 'python/**'
     - 'python_legacy/**'
-    - 'site/**'
+    - 'docs/**'
 
 jobs:
   spark2-tests:

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
@@ -24,4 +24,5 @@ gradle/*
 package-list
 sitemap.xml
 derby.log
-.python-version
+.python-version
+.*_index.md
diff --git a/docs/common/community/blogs.md b/docs/common/community/blogs.md
@@ -0,0 +1,129 @@
+---
+url: blogs
+weight: 200
+---
+<!--
+ - Licensed to the Apache Software Foundation (ASF) under one or more
+ - contributor license agreements.  See the NOTICE file distributed with
+ - this work for additional information regarding copyright ownership.
+ - The ASF licenses this file to You under the Apache License, Version 2.0
+ - (the "License"); you may not use this file except in compliance with
+ - the License.  You may obtain a copy of the License at
+ -
+ -   http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+ -->
+
+## Iceberg Blogs
+
+Here is a list of company blogs that talk about Iceberg. The blogs are ordered from most recent to oldest.
+
+### [Docker, Spark, and Iceberg: The Fastest Way to Try Iceberg!](https://tabular.io/blog/docker-spark-and-iceberg/)
+**Date**: February 2nd, 2022, **Company**: Tabular
+
+**Author**: [Sam Redai](https://www.linkedin.com/in/sredai/), [Kyle Bendickson](https://www.linkedin.com/in/kylebendickson/)
+
+### [Iceberg FileIO: Cloud Native Tables](https://tabular.io/blog/iceberg-fileio/)
+**Date**: December 16th, 2021, **Company**: Tabular
+
+**Author**: [Daniel Weeks](https://www.linkedin.com/in/daniel-weeks-a1946860/)
+
+### [Using Spark in EMR with Apache Iceberg](https://tabular.io/blog/emr-spark-and-iceberg/)
+**Date**: December 10th, 2021, **Company**: Tabular
+
+**Author**: [Sam Redai](https://www.linkedin.com/in/sredai/)
+
+### [Using Flink CDC to synchronize data from MySQL sharding tables and build real-time data lake](https://ververica.github.io/flink-cdc-connectors/master/content/quickstart/build-real-time-data-lake-tutorial.html)
+**Date**: November 11th, 2021, **Company**: Ververica, Alibaba Cloud
+
+**Author**: [Yuxia Luo](https://github.com/luoyuxia), [Jark Wu](https://github.com/wuchong), [Zheng Hu](https://www.linkedin.com/in/zheng-hu-37017683/)
+
+### [Metadata Indexing in Iceberg](https://tabular.io/blog/iceberg-metadata-indexing/)
+**Date**: October 10th, 2021, **Company**: Tabular
+
+**Author**: [Ryan Blue](https://www.linkedin.com/in/rdblue/)
+
+### [Using Debezium to Create a Data Lake with Apache Iceberg](https://debezium.io/blog/2021/10/20/using-debezium-create-data-lake-with-apache-iceberg/)
+**Date**: October 20th, 2021, **Company**: Memiiso Community
+
+**Author**: [Ismail Simsek](https://www.linkedin.com/in/ismailsimsek/)
+
+### [How to Analyze CDC Data in Iceberg Data Lake Using Flink](https://www.alibabacloud.com/blog/how-to-analyze-cdc-data-in-iceberg-data-lake-using-flink_597838)
+**Date**: June 15th, 2021, **Company**: Alibaba Cloud Community
+
+**Author**: [Li Jinsong](https://www.linkedin.com/in/%E5%8A%B2%E6%9D%BE-%E6%9D%8E-48b54b101/), [Hu Zheng](https://www.linkedin.com/in/zheng-hu-37017683/), [Yang Weihai](https://www.linkedin.com/in/weihai-yang-697a16224/), [Peidan Li](https://www.linkedin.com/in/peidian-li-18938820a/)
+
+### [Apache Iceberg: An Architectural Look Under the Covers](https://www.dremio.com/apache-iceberg-an-architectural-look-under-the-covers/)
+**Date**: July 6th, 2021, **Company**: Dremio
+
+**Author**: [Jason Hughes](https://www.linkedin.com/in/jasonhhughes/)
+
+### [Migrating to Apache Iceberg at Adobe Experience Platform](https://medium.com/adobetech/migrating-to-apache-iceberg-at-adobe-experience-platform-40fa80f8b8de)
+**Date**: Jun 17th, 2021, **Company**: Adobe
+
+**Author**: [Romin Parekh](https://www.linkedin.com/in/rominparekh/), [Miao Wang](https://www.linkedin.com/in/miao-wang-0406a74/), [Shone Sadler](https://www.linkedin.com/in/shonesadler/)
+
+### [Flink + Iceberg: How to Construct a Whole-scenario Real-time Data Warehouse](https://www.alibabacloud.com/blog/flink-%2B-iceberg-how-to-construct-a-whole-scenario-real-time-data-warehouse_597824)
+**Date**: Jun 8th, 2021, **Company**: Tencent
+
+**Author** [Shu (Simon Su) Su](https://www.linkedin.com/in/shu-su-62944994/)
+
+### [Trino on Ice III: Iceberg Concurrency Model, Snapshots, and the Iceberg Spec](https://blog.starburst.io/trino-on-ice-iii-iceberg-concurrency-model-snapshots-and-the-iceberg-spec)
+**Date**: May 25th, 2021, **Company**: Starburst
+
+**Author**: [Brian Olsen](https://www.linkedin.com/in/bitsondatadev)
+
+### [Trino on Ice II: In-Place Table Evolution and Cloud Compatibility with Iceberg](https://blog.starburst.io/trino-on-ice-ii-in-place-table-evolution-and-cloud-compatibility-with-iceberg)
+**Date**: May 11th, 2021, **Company**: Starburst
+
+**Author**: [Brian Olsen](https://www.linkedin.com/in/bitsondatadev)
+
+### [Trino On Ice I: A Gentle Introduction To Iceberg](https://blog.starburst.io/trino-on-ice-i-a-gentle-introduction-to-iceberg)
+**Date**: Apr 27th, 2021, **Company**: Starburst
+
+**Author**: [Brian Olsen](https://www.linkedin.com/in/bitsondatadev)
+
+### [Apache Iceberg: A Different Table Design for Big Data](https://thenewstack.io/apache-iceberg-a-different-table-design-for-big-data/)
+**Date**: Feb 1st, 2021, **Company**: thenewstack.io
+
+**Author**: [Susan Hall](https://thenewstack.io/author/susanhall/)
+
+### [A Short Introduction to Apache Iceberg](https://medium.com/expedia-group-tech/a-short-introduction-to-apache-iceberg-d34f628b6799)
+**Date**: Jan 26th, 2021, **Company**: Expedia
+
+**Author**: [Christine Mathiesen](https://www.linkedin.com/in/christine-mathiesen-676a98159/)
+
+### [Taking Query Optimizations to the Next Level with Iceberg](https://medium.com/adobetech/taking-query-optimizations-to-the-next-level-with-iceberg-6c968b83cd6f)
+**Date**: Jan 14th, 2021, **Company**: Adobe
+
+**Author**: [Gautam Kowshik](https://www.linkedin.com/in/gautamk/), [Xabriel J. Collazo Mojica](https://www.linkedin.com/in/xabriel/)
+
+### [FastIngest: Low-latency Gobblin with Apache Iceberg and ORC format](https://engineering.linkedin.com/blog/2021/fastingest-low-latency-gobblin)
+**Date**: Jan 6th, 2021, **Company**: Linkedin
+
+**Author**: [Zihan Li](https://www.linkedin.com/in/zihan-li-0a8a15149/), [Sudarshan Vasudevan](https://www.linkedin.com/in/suddu/), [Lei Sun](https://www.linkedin.com/in/lei-s-a93138a0/), [Shirshanka Das](https://www.linkedin.com/in/shirshankadas/)
+
+### [High Throughput Ingestion with Iceberg](https://medium.com/adobetech/high-throughput-ingestion-with-iceberg-ccf7877a413f)
+**Date**: Dec 22nd, 2020, **Company**: Adobe
+
+**Author**: [Andrei Ionescu](http://linkedin.com/in/andreiionescu), [Shone Sadler](https://www.linkedin.com/in/shonesadler/), [Anil Malkani](https://www.linkedin.com/in/anil-malkani-52861a/)
+
+### [Optimizing data warehouse storage](https://netflixtechblog.com/optimizing-data-warehouse-storage-7b94a48fdcbe)
+**Date**: Dec 21st, 2020, **Company**: Netflix
+
+**Author**: [Anupom Syam](https://www.linkedin.com/in/anupom/)
+
+### [Iceberg at Adobe](https://medium.com/adobetech/iceberg-at-adobe-88cf1950e866)
+**Date**: Dec 3rd, 2020, **Company**: Adobe
+
+**Author**: [Shone Sadler](https://www.linkedin.com/in/shonesadler/), [Romin Parekh](https://www.linkedin.com/in/rominparekh/), [Anil Malkani](https://www.linkedin.com/in/anil-malkani-52861a/)
+
+### [Bulldozer: Batch Data Moving from Data Warehouse to Online Key-Value Stores](https://netflixtechblog.com/bulldozer-batch-data-moving-from-data-warehouse-to-online-key-value-stores-41bac13863f8)
+**Date**: Oct 27th, 2020, **Company**: Netflix
+
+**Author**: [Tianlong Chen](https://www.linkedin.com/in/tianlong-chen-39189b7a/), [Ioannis Papapanagiotou](https://www.linkedin.com/in/ipapapa/)
diff --git a/docs/common/community/join.md b/docs/common/community/join.md
@@ -0,0 +1,92 @@
+---
+url: community
+weight: 100
+---
+<!--
+ - Licensed to the Apache Software Foundation (ASF) under one or more
+ - contributor license agreements.  See the NOTICE file distributed with
+ - this work for additional information regarding copyright ownership.
+ - The ASF licenses this file to You under the Apache License, Version 2.0
+ - (the "License"); you may not use this file except in compliance with
+ - the License.  You may obtain a copy of the License at
+ -
+ -   http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+ -->
+
+# Welcome!
+
+Apache Iceberg tracks issues in GitHub and prefers to receive contributions as pull requests.
+
+Community discussions happen primarily on the dev mailing list, on apache-iceberg Slack workspace, and on specific GitHub issues.
+
+
+## Contributing
+
+The Iceberg Project is hosted on Github at <https://github.com/apache/iceberg>.
+
+The Iceberg community prefers to receive contributions as [Github pull requests][github-pr-docs].
+
+* [View open pull requests][iceberg-prs]
+* [Learn about pull requests][github-pr-docs]
+
+[iceberg-prs]: https://github.com/apache/iceberg/pulls
+[github-pr-docs]: https://help.github.com/articles/about-pull-requests/
+
+
+## Issues
+
+Issues are tracked in GitHub:
+
+* [View open issues][open-issues]
+* [Open a new issue][new-issue]
+
+[open-issues]: https://github.com/apache/iceberg/issues
+[new-issue]: https://github.com/apache/iceberg/issues/new
+
+## Slack
+
+We use the [Apache Iceberg workspace](https://apache-iceberg.slack.com/) on Slack. To be invited, follow [this invite link](https://join.slack.com/t/apache-iceberg/shared_invite/zt-tlv0zjz6-jGJEkHfb1~heMCJA3Uycrg).
+
+Please note that this link may occasionally break when Slack does an upgrade. If you encounter problems using it, please let us know by sending an email to <[email protected]>.
+
+## Mailing Lists
+
+Iceberg has four mailing lists:
+
+* **Developers**: <[email protected]> -- used for community discussions
+    - [Subscribe](mailto:[email protected])
+    - [Unsubscribe](mailto:[email protected])
+    - [Archive](https://lists.apache.org/[email protected])
+* **Commits**: <[email protected]> -- distributes commit notifications
+    - [Subscribe](mailto:[email protected])
+    - [Unsubscribe](mailto:[email protected])
+    - [Archive](https://lists.apache.org/[email protected])
+* **Issues**: <[email protected]> -- Github issue tracking
+    - [Subscribe](mailto:[email protected])
+    - [Unsubscribe](mailto:[email protected])
+    - [Archive](https://lists.apache.org/[email protected])
+* **Private**: <[email protected]> -- private list for the PMC to discuss sensitive issues related to the health of the project
+    - [Archive](https://lists.apache.org/[email protected])
+
+
+## Setting up IDE and Code Style
+
+### Configuring Code Formatter for IntelliJ IDEA
+
+In the **Settings/Preferences** dialog go to **Editor > Code Style > Java**. Click on the gear wheel and select **Import Scheme** to import IntelliJ IDEA XML code style settings.
+Point to [intellij-java-palantir-style.xml](https://github.com/apache/iceberg/blob/master/.baseline/idea/intellij-java-palantir-style.xml) and hit **OK** (you might need to enable **Show Hidden Files and Directories** in the dialog). The code itself can then be formatted via **Code > Reformat Code**.
+
+See also the IntelliJ [Code Style docs](https://www.jetbrains.com/help/idea/copying-code-style-settings.html) and [Reformat Code docs](https://www.jetbrains.com/help/idea/reformat-and-rearrange-code.html) for additional details.
+
+## Running Benchmarks
+Some PRs/changesets might require running benchmarks to determine whether they are affecting the baseline performance. Currently there is 
+no "push a single button to get a performance comparison" solution available, therefore one has to run JMH performance tests on their local machine and
+post the results on the PR.
+
+See [Benchmarks](../benchmarks) for a summary of available benchmarks and how to run them.
diff --git a/docs/common/community/talks.md b/docs/common/community/talks.md
@@ -0,0 +1,33 @@
+---
+url: talks
+weight: 300
+---
+<!--
+ - Licensed to the Apache Software Foundation (ASF) under one or more
+ - contributor license agreements.  See the NOTICE file distributed with
+ - this work for additional information regarding copyright ownership.
+ - The ASF licenses this file to You under the Apache License, Version 2.0
+ - (the "License"); you may not use this file except in compliance with
+ - the License.  You may obtain a copy of the License at
+ -
+ -   http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+ -->
+
+## Iceberg Talks
+
+Here is a list of talks and other videos related to Iceberg.
+
+### [Expert Roundtable: The Future of Metadata After Hive Metastore](https://www.youtube.com/watch?v=7_Pt1g2x-XE)
+**Date**: November 15, 2021, **Authors**: Lior Ebel, Seshu Adunuthula, Ryan Blue & Oz Katz
+
+### [Spark and Iceberg at Apple's Scale - Leveraging differential files for efficient upserts and deletes](https://www.youtube.com/watch?v=IzkSGKoUxcQ)
+**Date**: October 21, 2020, **Author**: Anton
+
+### [Apache Iceberg - A Table Format for Huge Analytic Datasets](https://www.youtube.com/watch?v=mf8Hb0coI6o)
+**Date**: October 21, 2020, **Author**: Ryan Blue