diff --git a/docs/config.toml b/docs/config.toml index a9d2b9217..893b742ec 100644 --- a/docs/config.toml +++ b/docs/config.toml @@ -30,12 +30,8 @@ home = [ "HTML", "RSS", "SearchIndex" ] { name = "0.12.1", pre = "relative", url = "../0.12.1", weight = 1000 } ] topnav = [ - { name = "Docs", url = "/docs/latest", weight = 100 }, - { name = "Releases", pre = "relative", url = "../../releases", weight = 600 }, - { name = "Spark", url = "/docs/latest/getting-started", weight = 200 }, - { name = "Flink", url = "/docs/latest/flink", weight = 300 }, - { name = "Trino", url = "https://trino.io/docs/current/connector/iceberg.html", weight = 400 }, - { name = "Presto", url = "https://prestodb.io/docs/current/connector/iceberg.html" , weight = 500 }, + { name = "Quickstart", url = "/spark-quickstart", weight = 100 }, + { name = "Docs", url = "/docs/latest", weight = 200 }, { name = "Blogs", pre = "relative", url = "../../blogs", weight = 998 }, { name = "Talks", pre = "relative", url = "../../talks", weight = 999 }, { name = "Roadmap", pre = "relative", url = "../../roadmap", weight = 997 }, diff --git a/iceberg-theme/README.md b/iceberg-theme/README.md new file mode 100644 index 000000000..98ec75f4f --- /dev/null +++ b/iceberg-theme/README.md @@ -0,0 +1,63 @@ +# Iceberg Theme + +The Iceberg theme is a theme for use with [Hugo](https://gohugo.io/). + +## Hint Boxes + +To add a hint box, use the `hint` shortcode and pass in a hint box variation type. Available +types are `info`, `success`, `warning`, or `error`. + +``` +{{< hint info >}} +Here is a message for the hint box! +{{< /hint >}} +``` + +## Codetabs + +To add a tabbed view of different code examples, use the `codetabs`, `addtab`, and `tabcontent` shortcodes directly +within a markdown page. + +1. Define a named set of tabs, making sure that the name is unique across the current page. +``` +{{% codetabs "LaunchSparkClient" %}} +{{% /codetabs %}} +``` + +2. Add one or more tabs using the `addtab` shortcode. The arguments to the `addtab` shortcode are tab name, tab group, +and tab type. (see the logic in `iceberg-theme.js` to see how these are used) +```css +{{% codetabs "LaunchSparkClient" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% codetabs "LaunchSparkClient" %}} +{{% /codetabs %}} +``` + +3. Add content for each tab using the `tabcontent` shortcode. +``` +{{% codetabs "LaunchSparkClient" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +\```sh +docker exec -it spark-iceberg spark-sql +\``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +\```sh +docker exec -it spark-iceberg spark-shell +\``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +\```sh +docker exec -it spark-iceberg pyspark +\``` +{{% /tabcontent %}} +{{% /codetabs %}} +``` + +Codetab "groups" are used to coordinate switching the tab view throughout an entire page. +To add a new group, update the code in `iceberg-theme.js`. \ No newline at end of file diff --git a/iceberg-theme/layouts/partials/body.html b/iceberg-theme/layouts/partials/body.html index 5d04d7b46..fe4cc01d8 100644 --- a/iceberg-theme/layouts/partials/body.html +++ b/iceberg-theme/layouts/partials/body.html @@ -16,12 +16,12 @@ {{ partial "header.html" . }}
-
+
{{ if not .Params.disableSidebar }} {{ partial "sidebar.html" . }} {{ end }}
-
+
{{- .Content -}}
{{ if not .Params.disableToc }} diff --git a/iceberg-theme/layouts/shortcodes/addtab.html b/iceberg-theme/layouts/shortcodes/addtab.html index 00422f2ff..533dd04cb 100644 --- a/iceberg-theme/layouts/shortcodes/addtab.html +++ b/iceberg-theme/layouts/shortcodes/addtab.html @@ -1,2 +1,2 @@ - - \ No newline at end of file + + \ No newline at end of file diff --git a/iceberg-theme/layouts/shortcodes/codetabs.html b/iceberg-theme/layouts/shortcodes/codetabs.html index 96d510970..fee3a157f 100644 --- a/iceberg-theme/layouts/shortcodes/codetabs.html +++ b/iceberg-theme/layouts/shortcodes/codetabs.html @@ -1,91 +1 @@ - -
{{ .Inner }}
\ No newline at end of file +
{{ .Inner }}
diff --git a/iceberg-theme/layouts/shortcodes/quickstarts.html b/iceberg-theme/layouts/shortcodes/quickstarts.html new file mode 100644 index 000000000..82cb0af15 --- /dev/null +++ b/iceberg-theme/layouts/shortcodes/quickstarts.html @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + +
+ More Quickstarts +
{{ $currentPageTitle := .Page.Title }}{{ range .Site.Menus.quickstarts }}{{ if ne .Name $currentPageTitle }} +
+
+ {{ .Name }} +

+ {{ substr .Post 0 130 }} +

+
+ +
+
{{ else }}{{ end }}{{ end }} +
+
\ No newline at end of file diff --git a/iceberg-theme/layouts/shortcodes/tabcontent.html b/iceberg-theme/layouts/shortcodes/tabcontent.html index b1b3c6c01..c099bb799 100644 --- a/iceberg-theme/layouts/shortcodes/tabcontent.html +++ b/iceberg-theme/layouts/shortcodes/tabcontent.html @@ -1,3 +1,3 @@ -
+ {{ .Inner }} -
\ No newline at end of file + \ No newline at end of file diff --git a/iceberg-theme/static/css/iceberg-theme.css b/iceberg-theme/static/css/iceberg-theme.css index 4d20b62ee..43dcf5865 100644 --- a/iceberg-theme/static/css/iceberg-theme.css +++ b/iceberg-theme/static/css/iceberg-theme.css @@ -243,6 +243,15 @@ h4:hover a { visibility: visible} width: 100%; } +/* Cards at the top of each quickstart page */ +.quickstart-container { + display: flex; + flex-wrap: wrap; + width: 80vw; + grid-template-columns: 1fr 1fr 1fr; + grid-gap: 1rem; +} + .content-only { grid-template-columns: auto; } @@ -263,6 +272,10 @@ h4:hover a { visibility: visible} margin-right: 40%; } +.margin-without-toc { + margin-right: 20%; +} + #toc { position: fixed; right: 0; @@ -571,7 +584,7 @@ hr { } } -@media screen and (max-width: 1280px) { +@media screen and (max-width: 1040px) { .topnav-page-selection { display: none; } @@ -617,10 +630,10 @@ div#full ul.sub-menu { } .versions-dropdown-content { - display: none; - position: fixed; - background-color: #f9f9f9; - box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); + display: none; + position: fixed; + background-color: #f9f9f9; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2); } .versions-dropdown-content ul { @@ -636,5 +649,143 @@ div#full ul.sub-menu { } .versions-dropdown:hover .versions-dropdown-content { - display: block; + display: block; +} + +.quickstart-card { + color: #fff; + width:300px; + position: relative; + -webkit-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3); + -moz-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3); + -o-box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3); + box-shadow: 0px 5px 5px 0px rgba(0,0,0,0.3); + background-image: linear-gradient(to bottom right, #2879be, #57A7D8); +} + +.quickstart-card .quickstart-card-content { + padding: 30px; + height: 250px; +} + +.quickstart-card .quickstart-card-title { + font-size: 25px; + font-family: 'Open Sans', sans-serif; +} + +.quickstart-card .quickstart-card-text { + line-height: 1.6; +} + +.quickstart-card .quickstart-card-link { + padding: 25px; + width: -webkit-fill-available; + border-top:1px solid #82c1bb; +} + +.quickstart-card-link a { + text-decoration: none; + position: relative; + padding: 10px 0px; + color: #fff; +} + +.quickstart-card .quickstart-card-link a:after { + top: 30px; + content: ""; + display: block; + height: 2px; + left: 50%; + position: absolute; + width: 0; + background:#fff; + + -webkit-transition: width 0.3s ease 0s, left 0.3s ease 0s; + -moz-transition: width 0.3s ease 0s, left 0.3s ease 0s; + -o-transition: width 0.3s ease 0s, left 0.3s ease 0s; + transition: width 0.3s ease 0s, left 0.3s ease 0s; +} + +.quickstart-card .quickstart-card-link a:hover:after { + width: 100%; + left: 0; +} + +.quickstart-card .quickstart-card-link a:after { + background:#fff; +} + +/*styling for codetabs*/ +.codetabs h1 { + padding: 50px 0; + font-weight: 400; + text-align: center; +} + +.codetabs p { + margin: 0 0 20px; + line-height: 1.5; +} + +.codetabs main { + min-width: 320px; + max-width: 800px; + padding: 50px; + margin: 0 auto; + background: #fff; +} + +.codetabs codeblock { + display: none; + padding: 20px 0 0; + border-top: 1px solid #ddd; +} + +.codetabs input { + display: none; + visibility:hidden; +} + +.codetabs label { + display: inline-block; + margin: 0 0 -1px; + padding: 15px 25px; + font-weight: 600; + text-align: center; + color: #bbb; + border: 1px solid transparent; +} + +.codetabs label:before { + font-family: fontawesome; + font-weight: normal; + margin-right: 10px; +} + +.codetabs label:hover { + color: #888; + cursor: pointer; +} + +.codetabs input:checked + label { + color: #555; + border: 1px solid #ddd; + border-top: 2px solid #277ABE; + border-bottom: 1px solid #fff; +} + +@media screen and (max-width: 500px) { + .codetabs label { + font-size: 0; + } + #LaunchSparkClient label:before { + margin: 0; + font-size: 18px; + } +} + +@media screen and (max-width: 400px) { + .codetabs label { + padding: 15px; + } } \ No newline at end of file diff --git a/iceberg-theme/static/js/iceberg-theme.js b/iceberg-theme/static/js/iceberg-theme.js index f305bd213..99abe7926 100644 --- a/iceberg-theme/static/js/iceberg-theme.js +++ b/iceberg-theme/static/js/iceberg-theme.js @@ -25,4 +25,43 @@ $("#searchclear").click(function(){ while (results.firstChild) { results.removeChild(results.firstChild); } -}); \ No newline at end of file +}); + +// Coordinate control of codetabs +const languages = ["spark-sql", "spark-shell", "pyspark"] +const groups = { + "spark-queries": [ + "spark-sql", + "spark-shell", + "pyspark" + ], + "spark-init": [ + "cli", + "spark-defaults" + ] +} +function hideCodeBlocks(group, type) { + var codeblocks = document.querySelectorAll(`.${type}`); + for (var i = 0; i < codeblocks.length; i++) { + codeblocks[i].style.display = 'none'; + } +} + +function showCodeBlocks(group, type) { + var codeblocks = document.querySelectorAll(`.${type}`); + for (var i = 0; i < codeblocks.length; i++) { + codeblocks[i].style.display = 'block'; + } +} + +function selectExampleLanguage(group, type) { + groups[group].forEach(lang => hideCodeBlocks(group, lang)); + inputs = Array.from(document.getElementsByTagName('input')).filter(e => e.id == type); + inputs.forEach(input => input.checked = true); + console.log(inputs); + showCodeBlocks(group, type); +} + +// Set the default tab for each group +selectExampleLanguage("spark-queries", "spark-sql"); +selectExampleLanguage("spark-init", "cli"); \ No newline at end of file diff --git a/landing-page/config.toml b/landing-page/config.toml index e2296eb12..4ae237700 100644 --- a/landing-page/config.toml +++ b/landing-page/config.toml @@ -40,12 +40,9 @@ home = [ "HTML", "RSS", "SearchIndex" ] { name = "0.12.1", url = "/docs/0.12.1", weight = 1000 } ] topnav = [ - { name = "Docs", url = "/docs/latest", weight = 100 }, + { name = "Quickstart", url = "/spark-quickstart", weight = 100 }, + { name = "Docs", url = "/docs/latest", weight = 200 }, { name = "Releases", url = "/releases", weight = 600 }, - { name = "Spark", url = "/docs/latest/getting-started", weight = 200 }, - { name = "Flink", url = "/docs/latest/flink", weight = 300 }, - { name = "Trino", identifier = "_trino", url = "https://trino.io/docs/current/connector/iceberg.html", weight = 400 }, - { name = "Presto", identifier = "_presto", url = "https://prestodb.io/docs/current/connector/iceberg.html", weight = 500 }, { name = "Blogs", url = "/blogs", weight = 998 }, { name = "Talks", url = "/talks", weight = 999 }, { name = "Roadmap", url = "/roadmap", weight = 997 }, @@ -63,6 +60,9 @@ home = [ "HTML", "RSS", "SearchIndex" ] { name = "Donate", identifier = "_donate", parent = "ASF", url = "https://www.apache.org/foundation/sponsorship.html" }, { name = "Events", identifier = "_events", parent = "ASF", url = "https://www.apache.org/events/current-event.html" }, ] + quickstarts = [ + { name = "Spark and Iceberg Quickstart", weight = 100, url = "spark-quickstart", post = "This quickstart will get you up and running with an Iceberg and Spark environment, including sample notebooks." } + ] [markup.goldmark.renderer] unsafe= true \ No newline at end of file diff --git a/landing-page/content/common/spark-quickstart.md b/landing-page/content/common/spark-quickstart.md new file mode 100644 index 000000000..cef9558a5 --- /dev/null +++ b/landing-page/content/common/spark-quickstart.md @@ -0,0 +1,325 @@ +--- +title: "Spark and Iceberg Quickstart" +weight: 100 +url: spark-quickstart +aliases: + - "quickstart" + - "quickstarts" + - "getting-started" +disableSidebar: true +disableToc: true +--- + + + + +## Spark and Iceberg Quickstart + +This guide will get you up and running with an Iceberg and Spark environment, including sample code to +highlight some powerful features. You can learn more about Iceberg's Spark runtime by checking out the [Spark](../docs/latest/spark-ddl/) section. + +- [Docker-Compose](#docker-compose) +- [Creating a table](#creating-a-table) +- [Writing Data to a Table](#writing-data-to-a-table) +- [Reading Data from a Table](#reading-data-from-a-table) +- [Adding A Catalog](#adding-a-catalog) +- [Next Steps](#next-steps) + +### Docker-Compose + +The fastest way to get started is to use a docker-compose file that uses the the [tabulario/spark-iceberg](https://hub.docker.com/r/tabulario/spark-iceberg) image +which contains a local Spark cluster with a configured Iceberg catalog. To use this, you'll need to install the [Docker CLI](https://docs.docker.com/get-docker/) as well as the [Docker Compose CLI](https://github.com/docker/compose-cli/blob/main/INSTALL.md). + +Once you have those, save the yaml below into a file named `docker-compose.yml`: + +```yaml +version: "3" + +services: + spark-iceberg: + image: tabulario/spark-iceberg + depends_on: + - postgres + container_name: spark-iceberg + environment: + - SPARK_HOME=/opt/spark + - PYSPARK_PYTON=/usr/bin/python3.9 + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin + volumes: + - ./warehouse:/home/iceberg/warehouse + - ./notebooks:/home/iceberg/notebooks/notebooks + ports: + - 8888:8888 + - 8080:8080 + - 18080:18080 + postgres: + image: postgres:13.4-bullseye + container_name: postgres + environment: + - POSTGRES_USER=admin + - POSTGRES_PASSWORD=password + - POSTGRES_DB=demo_catalog + volumes: + - ./postgres/data:/var/lib/postgresql/data +``` + +Next, start up the docker containers with this command: +```sh +docker-compose up +``` + +You can then run any of the following commands to start a Spark session. + +{{% codetabs "LaunchSparkClient" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +```sh +docker exec -it spark-iceberg spark-sql +``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +```sh +docker exec -it spark-iceberg spark-shell +``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +```sh +docker exec -it spark-iceberg pyspark +``` +{{% /tabcontent %}} +{{% /codetabs %}} +{{< hint info >}} +You can also launch a notebook server by running `docker exec -it spark-iceberg notebook`. +The notebook server will be available at [http://localhost:8888](http://localhost:8888) +{{< /hint >}} + +### Creating a table + +To create your first Iceberg table in Spark, run a [`CREATE TABLE`](../spark-ddl#create-table) command. Let's create a table +using `demo.nyc.taxis` where `demo` is the catalog name, `nyc` is the database name, and `taxis` is the table name. + + +{{% codetabs "CreateATable" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +```sql +CREATE TABLE demo.nyc.taxis +( + vendor_id bigint, + trip_id bigint, + trip_distance float, + fare_amount double, + store_and_fwd_flag string +) +PARTITIONED BY (vendor_id); +``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +```scala +import org.apache.spark.sql.types._ +import org.apache.spark.sql.Row +val schema = StructType( Array( + StructField("vendor_id", LongType,true), + StructField("trip_id", LongType,true), + StructField("trip_distance", FloatType,true), + StructField("fare_amount", DoubleType,true), + StructField("store_and_fwd_flag", StringType,true) +)) +val df = spark.createDataFrame(spark.sparkContext.emptyRDD[Row],schema) +df.writeTo("demo.nyc.taxis").create() +``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +```py +from pyspark.sql.types import DoubleType, FloatType, LongType, StructType,StructField, StringType +schema = StructType([ + StructField("vendor_id", LongType(), True), + StructField("trip_id", LongType(), True), + StructField("trip_distance", FloatType(), True), + StructField("fare_amount', DoubleType(), True), + StructField("store_and_fwd_flag', StringType(), True) +]) + +df = spark.createDataFrame([], schema) +df.writeTo("demo.nyc.taxis").create() +``` +{{% /tabcontent %}} +{{% /codetabs %}} + +Iceberg catalogs support the full range of SQL DDL commands, including: + +* [`CREATE TABLE ... PARTITIONED BY`](../spark-ddl#create-table) +* [`CREATE TABLE ... AS SELECT`](../spark-ddl#create-table--as-select) +* [`ALTER TABLE`](../spark-ddl#alter-table) +* [`DROP TABLE`](../spark-ddl#drop-table) + +### Writing Data to a Table + +Once your table is created, you can insert records. + +{{% codetabs "InsertData" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +```sql +INSERT INTO demo.nyc.taxis +VALUES (1, 1000371, 1.8, 15.32, 'N'), (2, 1000372, 2.5, 22.15, 'N'), (2, 1000373, 0.9, 9.01, 'N'), (1, 1000374, 8.4, 42.13, 'Y'); +``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +```scala +import org.apache.spark.sql.Row + +val schema = spark.table("demo.nyc.taxis").schema +val data = Seq( + Row(1: Long, 1000371: Long, 1.8f: Float, 15.32: Double, "N": String), + Row(2: Long, 1000372: Long, 2.5f: Float, 22.15: Double, "N": String), + Row(2: Long, 1000373: Long, 0.9f: Float, 9.01: Double, "N": String), + Row(1: Long, 1000374: Long, 8.4f: Float, 42.13: Double, "Y": String) +) +val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) +df.writeTo("demo.nyc.taxis").append() +``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +```py +schema = spark.table("demo.nyc.taxis").schema +data = [ + (1, 1000371, 1.8, 15.32, "N"), + (2, 1000372, 2.5, 22.15, "N"), + (2, 1000373, 0.9, 9.01, "N"), + (1, 1000374, 8.4, 42.13, "Y") + ] +df = spark.createDataFrame(data, schema) +df.writeTo("demo.nyc.taxis").append() +``` +{{% /tabcontent %}} +{{% /codetabs %}} + +### Reading Data from a Table + +To read a table, simply use the Iceberg table's name. + +{{% codetabs "SelectData" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +```sql +SELECT * FROM demo.nyc.taxis; +``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +```scala +val df = spark.table("demo.nyc.taxis").show() +``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +```py +df = spark.table("demo.nyc.taxis").show() +``` +{{% /tabcontent %}} +{{% /codetabs %}} + + +### Adding A Catalog + +Iceberg has several catalog back-ends that can be used to track tables, like JDBC, Hive MetaStore and Glue. +Catalogs are configured using properties under `spark.sql.catalog.(catalog_name)`. In this guide, +we use JDBC, but you can follow these instructions to configure other catalog types. To learn more, check out +the [Catalog](../docs/latest/spark-configuration/#catalogs) page in the Spark section. + +This configuration creates a path-based catalog named `local` for tables under `$PWD/warehouse` and adds support for Iceberg tables to Spark's built-in catalog. + + +{{% codetabs "AddingACatalog" %}} +{{% addtab "CLI" "spark-init" "cli" %}} +{{% addtab "spark-defaults.conf" "spark-init" "spark-defaults" %}} +{{% tabcontent "cli" %}} +```sh +spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}}\ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \ + --conf spark.sql.catalog.spark_catalog.type=hive \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.demo.type=hadoop \ + --conf spark.sql.catalog.demo.warehouse=$PWD/warehouse \ + --conf spark.sql.defaultCatalog=demo +``` +{{% /tabcontent %}} +{{% tabcontent "spark-defaults" %}} +```sh +spark.jars.packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}} +spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +spark.sql.catalog.spark_catalog org.apache.iceberg.spark.SparkSessionCatalog +spark.sql.catalog.spark_catalog.type hive +spark.sql.catalog.demo org.apache.iceberg.spark.SparkCatalog +spark.sql.catalog.demo.type hadoop +spark.sql.catalog.demo.warehouse $PWD/warehouse +spark.sql.defaultCatalog demo +``` +{{% /tabcontent %}} +{{% /codetabs %}} + + +{{< hint info >}} +If your Iceberg catalog is not set as the default catalog, you will have to switch to it by executing `USE demo;` +{{< /hint >}} + +### Next steps + +#### Adding Iceberg to Spark + +If you already have a Spark environment, you can add Iceberg, using the `--packages` option. + +{{% codetabs "AddIcebergToSpark" %}} +{{% addtab "SparkSQL" "spark-queries" "spark-sql" %}} +{{% addtab "Spark-Shell" "spark-queries" "spark-shell" %}} +{{% addtab "PySpark" "spark-queries" "pyspark" %}} +{{% tabcontent "spark-sql" %}} +```sh +spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}} +``` +{{% /tabcontent %}} +{{% tabcontent "spark-shell" %}} +```sh +spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}} +``` +{{% /tabcontent %}} +{{% tabcontent "pyspark" %}} +```sh +pyspark --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:{{% icebergVersion %}} +``` +{{% /tabcontent %}} +{{% /codetabs %}} + +{{< hint info >}} +If you want to include Iceberg in your Spark installation, add the Iceberg Spark runtime to Spark's `jars` folder. +You can download the runtime by visiting to the [Releases](https://iceberg.apache.org/releases/) page. +{{< /hint >}} + +[spark-runtime-jar]: https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{% icebergVersion %}}/iceberg-spark-runtime-3.2_2.12-{{% icebergVersion %}}.jar + +#### Learn More + +Now that you're up an running with Iceberg and Spark, check out the [Iceberg-Spark docs](../docs/latest/spark-ddl/) to learn more! \ No newline at end of file