+
{{- .Content -}}
{{ if not .Params.disableToc }}
diff --git a/iceberg-theme/static/css/iceberg-theme.css b/iceberg-theme/static/css/iceberg-theme.css
index 4d20b62ee..30237946c 100644
--- a/iceberg-theme/static/css/iceberg-theme.css
+++ b/iceberg-theme/static/css/iceberg-theme.css
@@ -243,6 +243,15 @@ h4:hover a { visibility: visible}
width: 100%;
}
+/* Cards at the top of each quickstart page */
+.quickstart-container {
+ display: flex;
+ flex-wrap: wrap;
+ width: 80vw;
+ grid-template-columns: 1fr 1fr 1fr;
+ grid-gap: 1rem;
+}
+
.content-only {
grid-template-columns: auto;
}
@@ -263,6 +272,10 @@ h4:hover a { visibility: visible}
margin-right: 40%;
}
+.margin-without-toc {
+ margin-right: 20%;
+}
+
#toc {
position: fixed;
right: 0;
@@ -637,4 +650,67 @@ div#full ul.sub-menu {
.versions-dropdown:hover .versions-dropdown-content {
display: block;
+}
+
+.quickstart-card {
+color: #fff;
+width:300px;
+position: relative;
+-webkit-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3);
+-moz-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3);
+-o-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3);
+box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3);
+background-image: linear-gradient(to bottom right, #2879be, #57A7D8);
+}
+
+.quickstart-card .quickstart-card-content {
+ padding: 30px;
+ height: 250px;
+}
+
+.quickstart-card .quickstart-card-title {
+ font-size: 25px;
+ font-family: 'Open Sans', sans-serif;
+}
+
+.quickstart-card .quickstart-card-text {
+line-height: 1.6;
+}
+
+.quickstart-card .quickstart-card-link {
+padding: 25px;
+width: -webkit-fill-available;
+border-top:1px solid #82c1bb;
+}
+
+.quickstart-card-link a {
+ text-decoration: none;
+ position: relative;
+ padding: 10px 0px;
+ color: #fff;
+}
+
+.quickstart-card .quickstart-card-link a:after {
+ top: 30px;
+ content: "";
+ display: block;
+ height: 2px;
+ left: 50%;
+ position: absolute;
+ width: 0;
+ background:#fff;
+
+ -webkit-transition: width 0.3s ease 0s, left 0.3s ease 0s;
+ -moz-transition: width 0.3s ease 0s, left 0.3s ease 0s;
+ -o-transition: width 0.3s ease 0s, left 0.3s ease 0s;
+ transition: width 0.3s ease 0s, left 0.3s ease 0s;
+}
+
+.quickstart-card .quickstart-card-link a:hover:after {
+ width: 100%;
+ left: 0;
+}
+
+.quickstart-card .quickstart-card-link a:after {
+ background:#fff;
}
\ No newline at end of file
diff --git a/landing-page/config.toml b/landing-page/config.toml
index e2296eb12..4ae237700 100644
--- a/landing-page/config.toml
+++ b/landing-page/config.toml
@@ -40,12 +40,9 @@ home = [ "HTML", "RSS", "SearchIndex" ]
{ name = "0.12.1", url = "/docs/0.12.1", weight = 1000 }
]
topnav = [
- { name = "Docs", url = "/docs/latest", weight = 100 },
+ { name = "Quickstart", url = "/spark-quickstart", weight = 100 },
+ { name = "Docs", url = "/docs/latest", weight = 200 },
{ name = "Releases", url = "/releases", weight = 600 },
- { name = "Spark", url = "/docs/latest/getting-started", weight = 200 },
- { name = "Flink", url = "/docs/latest/flink", weight = 300 },
- { name = "Trino", identifier = "_trino", url = "https://trino.io/docs/current/connector/iceberg.html", weight = 400 },
- { name = "Presto", identifier = "_presto", url = "https://prestodb.io/docs/current/connector/iceberg.html", weight = 500 },
{ name = "Blogs", url = "/blogs", weight = 998 },
{ name = "Talks", url = "/talks", weight = 999 },
{ name = "Roadmap", url = "/roadmap", weight = 997 },
@@ -63,6 +60,9 @@ home = [ "HTML", "RSS", "SearchIndex" ]
{ name = "Donate", identifier = "_donate", parent = "ASF", url = "https://www.apache.org/foundation/sponsorship.html" },
{ name = "Events", identifier = "_events", parent = "ASF", url = "https://www.apache.org/events/current-event.html" },
]
+ quickstarts = [
+ { name = "Spark and Iceberg Quickstart", weight = 100, url = "spark-quickstart", post = "This quickstart will get you up and running with an Iceberg and Spark environment, including sample notebooks." }
+ ]
[markup.goldmark.renderer]
unsafe= true
\ No newline at end of file
diff --git a/landing-page/content/common/spark-quickstart.md b/landing-page/content/common/spark-quickstart.md
new file mode 100644
index 000000000..27b389ec4
--- /dev/null
+++ b/landing-page/content/common/spark-quickstart.md
@@ -0,0 +1,323 @@
+---
+title: "Spark and Iceberg Quickstart"
+weight: 100
+url: spark-quickstart
+aliases:
+ - "quickstart"
+ - "quickstarts"
+ - "getting-started"
+disableSidebar: true
+disableToc: true
+---
+
+
+{{% quickstarts %}}
+
+## Spark and Iceberg Quickstart
+
+This guide will get you up and running with an Iceberg and Spark environment, including sample code to
+highlight some powerful features. You can learn more about Iceberg's Spark runtime by checking out the [Spark](../docs/latest/spark-ddl/) section.
+
+- [Docker-Compose](#docker-compose)
+- [Creating a table](#creating-a-table)
+- [Writing Data to a Table](#writing-data-to-a-table)
+- [Reading Data from a Table](#reading-data-from-a-table)
+- [Adding Iceberg to Spark](#adding-iceberg-to-spark)
+- [Adding A Catalog](#adding-a-catalog)
+- [Next Steps](#next-steps)
+
+### Docker-Compose
+
+The fastest way to get started is to use a docker-compose file that uses the the [tabulario/spark-iceberg](https://hub.docker.com/r/tabulario/spark-iceberg) image
+which contains a local Spark cluster with a configured Iceberg catalog. To use this, you'll need to install the [Docker CLI](https://docs.docker.com/get-docker/) as well as the [Docker Compose CLI](https://github.com/docker/compose-cli/blob/main/INSTALL.md).
+
+Once you have those, save the yaml below into a file named `docker-compose.yml`:
+
+```yaml
+version: "3"
+
+services:
+ spark-iceberg:
+ image: tabulario/spark-iceberg
+ depends_on:
+ - postgres
+ container_name: spark-iceberg
+ environment:
+ - SPARK_HOME=/opt/spark
+ - PYSPARK_PYTON=/usr/bin/python3.9
+ - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin
+ volumes:
+ - ./warehouse:/home/iceberg/warehouse
+ - ./notebooks:/home/iceberg/notebooks/notebooks
+ ports:
+ - 8888:8888
+ - 8080:8080
+ - 18080:18080
+ postgres:
+ image: postgres:13.4-bullseye
+ container_name: postgres
+ environment:
+ - POSTGRES_USER=admin
+ - POSTGRES_PASSWORD=password
+ - POSTGRES_DB=demo_catalog
+ volumes:
+ - ./postgres/data:/var/lib/postgresql/data
+```
+
+Next, start up the docker containers with this command:
+```sh
+docker-compose up
+```
+
+You can then run any of the following commands to start a Spark session.
+
+{{% codetabs "LaunchSparkClient" %}}
+{{% addtab "SparkSQL" checked %}}
+{{% addtab "SparkShell" %}}
+{{% addtab "PySpark" %}}
+{{% tabcontent "SparkSQL" %}}
+```sh
+docker exec -it spark-iceberg spark-sql
+```
+{{% /tabcontent %}}
+{{% tabcontent "SparkShell" %}}
+```sh
+docker exec -it spark-iceberg spark-shell
+```
+{{% /tabcontent %}}
+{{% tabcontent "PySpark" %}}
+```sh
+docker exec -it spark-iceberg pyspark
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+{{< hint info >}}
+You can also launch a notebook server by running `docker exec -it spark-iceberg notebook`.
+The notebook server will be available at [http://localhost:8888](http://localhost:8888)
+{{< /hint >}}
+
+### Creating a table
+
+To create your first Iceberg table in Spark, run a [`CREATE TABLE`](../spark-ddl#create-table) command. Let's create a table
+using `demo.nyc.taxis` where `demo` is the catalog name, `nyc` is the database name, and `taxis` is the table name.
+
+
+{{% codetabs "CreateATable" %}}
+{{% addtab "SparkSQL" checked %}}
+{{% addtab "SparkShell" %}}
+{{% addtab "PySpark" %}}
+{{% tabcontent "SparkSQL" %}}
+```sql
+CREATE TABLE demo.nyc.taxis
+(
+ vendor_id bigint,
+ trip_id bigint,
+ trip_distance float,
+ fare_amount double,
+ store_and_fwd_flag string
+)
+PARTITIONED BY (vendor_id);
+```
+{{% /tabcontent %}}
+{{% tabcontent "SparkShell" %}}
+```scala
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.Row
+val schema = StructType( Array(
+ StructField("vendor_id", LongType,true),
+ StructField("trip_id", LongType,true),
+ StructField("trip_distance", FloatType,true),
+ StructField("fare_amount", DoubleType,true),
+ StructField("store_and_fwd_flag", StringType,true)
+))
+val df = spark.createDataFrame(spark.sparkContext.emptyRDD[Row],schema)
+df.writeTo("demo.nyc.taxis").create()
+```
+{{% /tabcontent %}}
+{{% tabcontent "PySpark" %}}
+```py
+from pyspark.sql.types import DoubleType, FloatType, LongType, StructType,StructField, StringType
+schema = StructType([
+ StructField("vendor_id", LongType(), True),
+ StructField("trip_id", LongType(), True),
+ StructField("trip_distance", FloatType(), True),
+ StructField("fare_amount', DoubleType(), True),
+ StructField("store_and_fwd_flag', StringType(), True)
+])
+
+df = spark.createDataFrame([], schema)
+df.writeTo("demo.nyc.taxis").create()
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+
+Iceberg catalogs support the full range of SQL DDL commands, including:
+
+* [`CREATE TABLE ... PARTITIONED BY`](../spark-ddl#create-table)
+* [`CREATE TABLE ... AS SELECT`](../spark-ddl#create-table--as-select)
+* [`ALTER TABLE`](../spark-ddl#alter-table)
+* [`DROP TABLE`](../spark-ddl#drop-table)
+
+### Writing Data to a Table
+
+Once your table is created, you can insert records.
+
+{{% codetabs "InsertData" %}}
+{{% addtab "SparkSQL" checked %}}
+{{% addtab "SparkShell" %}}
+{{% addtab "PySpark" %}}
+{{% tabcontent "SparkSQL" %}}
+```sql
+INSERT INTO demo.nyc.taxis
+VALUES (1, 1000371, 1.8, 15.32, 'N'), (2, 1000372, 2.5, 22.15, 'N'), (2, 1000373, 0.9, 9.01, 'N'), (1, 1000374, 8.4, 42.13, 'Y');
+```
+{{% /tabcontent %}}
+{{% tabcontent "SparkShell" %}}
+```scala
+import org.apache.spark.sql.Row
+
+val schema = spark.table("demo.nyc.taxis").schema
+val data = Seq(
+ Row(1: Long, 1000371: Long, 1.8f: Float, 15.32: Double, "N": String),
+ Row(2: Long, 1000372: Long, 2.5f: Float, 22.15: Double, "N": String),
+ Row(2: Long, 1000373: Long, 0.9f: Float, 9.01: Double, "N": String),
+ Row(1: Long, 1000374: Long, 8.4f: Float, 42.13: Double, "Y": String)
+)
+val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+df.writeTo("demo.nyc.taxis").append()
+```
+{{% /tabcontent %}}
+{{% tabcontent "PySpark" %}}
+```py
+schema = spark.table("demo.nyc.taxis").schema
+data = [
+ (1, 1000371, 1.8, 15.32, "N"),
+ (2, 1000372, 2.5, 22.15, "N"),
+ (2, 1000373, 0.9, 9.01, "N"),
+ (1, 1000374, 8.4, 42.13, "Y")
+ ]
+df = spark.createDataFrame(data, schema)
+df.writeTo("demo.nyc.taxis").append()
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+
+### Reading Data from a Table
+
+To read a table, simply use the Iceberg table's name.
+
+{{% codetabs "SelectData" %}}
+{{% addtab "SparkSQL" checked %}}
+{{% addtab "SparkShell" %}}
+{{% addtab "PySpark" %}}
+{{% tabcontent "SparkSQL" %}}
+```sql
+SELECT * FROM demo.nyc.taxis;
+```
+{{% /tabcontent %}}
+{{% tabcontent "SparkShell" %}}
+```scala
+val df = spark.table("demo.nyc.taxis").show()
+```
+{{% /tabcontent %}}
+{{% tabcontent "PySpark" %}}
+```py
+df = spark.table("demo.nyc.taxis").show()
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+
+### Adding Iceberg to Spark
+
+To add Iceberg, use the `--packages` option.
+
+{{% codetabs "AddIcebergToSpark" %}}
+{{% addtab "SparkSQL" checked %}}
+{{% addtab "SparkShell" %}}
+{{% addtab "PySpark" %}}
+{{% tabcontent "SparkSQL" %}}
+```sh
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}
+```
+{{% /tabcontent %}}
+{{% tabcontent "SparkShell" %}}
+```sh
+spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}
+```
+{{% /tabcontent %}}
+{{% tabcontent "PySpark" %}}
+```sh
+pyspark --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+
+{{< hint info >}}
+If you want to include Iceberg in your Spark installation, add the Iceberg Spark runtime to Spark's `jars` folder.
+You can download the runtime by visiting to the [Releases](https://iceberg.apache.org/releases/) page.
+{{< /hint >}}
+
+[spark-runtime-jar]: https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{% icebergVersion %}}/iceberg-spark-runtime-3.2_2.12-{{% icebergVersion %}}.jar
+
+### Adding A Catalog
+
+Iceberg has several catalog back-ends that can be used to track tables, like JDBC, Hive MetaStore and Glue.
+Catalogs are configured using properties under `spark.sql.catalog.(catalog_name)`. In this guide,
+we use JDBC, but you can follow these instructions to configure other catalog types. To learn more, check out
+the [Catalog](../docs/latest/spark-configuration/#catalogs) page in the Spark section.
+
+This configuration creates a path-based catalog named `local` for tables under `$PWD/warehouse` and adds support for Iceberg tables to Spark's built-in catalog.
+
+
+{{% codetabs "AddingACatalog" %}}
+{{% addtab "CLI" checked %}}
+{{% addtab "spark-defaults" %}}
+{{% tabcontent "CLI" %}}
+```sh
+spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}\
+ --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
+ --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
+ --conf spark.sql.catalog.spark_catalog.type=hive \
+ --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \
+ --conf spark.sql.catalog.demo.type=hadoop \
+ --conf spark.sql.catalog.demo.warehouse=$PWD/warehouse \
+ --conf spark.sql.defaultCatalog=demo
+```
+{{% /tabcontent %}}
+{{% tabcontent "spark-defaults" %}}
+```sh
+spark.jars.packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}
+spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
+spark.sql.catalog.spark_catalog org.apache.iceberg.spark.SparkSessionCatalog
+spark.sql.catalog.spark_catalog.type hive
+spark.sql.catalog.demo org.apache.iceberg.spark.SparkCatalog
+spark.sql.catalog.demo.type hadoop
+spark.sql.catalog.demo.warehouse $PWD/warehouse
+spark.sql.defaultCatalog demo
+```
+{{% /tabcontent %}}
+{{% /codetabs %}}
+
+
+{{< hint info >}}
+If your Iceberg catalog is not set as the default catalog, you will have to switch to it by executing `USE demo;`
+{{< /hint >}}
+
+### Next steps
+
+Now that you're up an running with Iceberg and Spark, check out the [Iceberg-Spark runtime docs](../docs/latest/spark-ddl/) to learn more!
\ No newline at end of file
diff --git a/landing-page/layouts/shortcodes/quickstarts.html b/landing-page/layouts/shortcodes/quickstarts.html
new file mode 100644
index 000000000..9ee36484d
--- /dev/null
+++ b/landing-page/layouts/shortcodes/quickstarts.html
@@ -0,0 +1,31 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+More Quickstarts
+{{ $currentPageTitle := .Page.Title }}{{ range .Site.Menus.quickstarts }}{{ if ne .Name $currentPageTitle }}
+
+
+
{{ .Name }}
+
+ {{ substr .Post 0 130 }}
+
+
+
+
+
{{ else }}{{ end }}{{ end }}
+
+
\ No newline at end of file