From c9254d7cd27ee32c2bcf305845733ece02e37f74 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Wed, 7 Sep 2022 20:38:10 +0200
Subject: [PATCH 1/2] Python: Split Python docs

This PR will split the Python docs in a separate site. The main reason
for this is that the docs are part of the Java release, which is not in
sync with the Python release cylce. Meaning that there is a high probability
that the docs does not match with current version of the code.

This will publish the docs to Github pages, by pushing this to the `gh-pages`
branch. We can set up an alias from Apache, and point pyiceberg.apache.org to
the github pages endpoint.

I also tried readthedocs, but I found that not straightforward. Mostly because
they have a build process on their end that will pull the code, and build the
docs. This involves another pipeline that we have to monitor, and we have to
set up webhooks. I am a simple man, and I like simple things, therefore I went
for mkdocs. This can push the docs to github pages in a single command:
https://www.mkdocs.org/user-guide/deploying-your-docs/#project-pages

Considerations:

- Decided to keep it to a single page for now, we can break it out into different
  pages later on. Let me know what you think of this.
- We build the docs now when we push to master, probably we'll change this
  later to trigger on tags.
- I've removed the Python docs from the other docs to avoid confusion and make sure
  that we have a single source of truth.

An example is shown here: https://fokko.github.io/incubator-iceberg/
(Once this is merged, I'll remove that one)

Closes #363
Closes #3283
---
 .asf.yaml                            |   2 +
 .github/workflows/python-ci-docs.yml |  60 +++
 docs/python-api-intro.md             | 184 ---------
 docs/python-feature-support.md       |  79 ----
 docs/python-quickstart.md            |  70 ----
 python/.pre-commit-config.yaml       |   8 +
 python/CONTRIBUTING.md               |   3 +
 python/README.md                     |  10 +-
 python/mkdocs/README.md              |  28 ++
 python/mkdocs/docs/index.md          | 548 +++++++++++++++++++++++++++
 python/mkdocs/mkdocs.yml             |  18 +
 python/mkdocs/requirements.txt       |  19 +
 12 files changed, 690 insertions(+), 339 deletions(-)
 create mode 100644 .github/workflows/python-ci-docs.yml
 delete mode 100644 docs/python-api-intro.md
 delete mode 100644 docs/python-feature-support.md
 delete mode 100644 docs/python-quickstart.md
 create mode 100644 python/mkdocs/README.md
 create mode 100644 python/mkdocs/docs/index.md
 create mode 100644 python/mkdocs/mkdocs.yml
 create mode 100644 python/mkdocs/requirements.txt

diff --git a/.asf.yaml b/.asf.yaml
index 420c8d1741de..a99fe92001f2 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -42,6 +42,8 @@ github:
     - SreeramGarlapati
     - samredai
     - Fokko
+  ghp_branch: gh-pages
+  ghp_path: ~
 
 notifications:
     commits:      commits@iceberg.apache.org
diff --git a/.github/workflows/python-ci-docs.yml b/.github/workflows/python-ci-docs.yml
new file mode 100644
index 000000000000..3f579b5997b3
--- /dev/null
+++ b/.github/workflows/python-ci-docs.yml
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Python Docs"
+on:
+  push:
+    branches:
+      - 'master'
+  pull_request:
+    paths:
+      - '.github/workflows/python-ci-docs.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+  docs:
+    runs-on: ubuntu-20.04
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install
+        working-directory: ./python/mkdocs
+        run: pip install -r requirements.txt
+      - name: Build
+        working-directory: ./python/mkdocs
+        run: mkdocs build
+      - name: Copy
+        working-directory: ./python/mkdocs
+        run: mv ./site /tmp/site
+      - name: Push changes to gh-pages branch
+        run: |
+          git checkout --orphan gh-pages-tmp
+          git rm --quiet -rf .
+          cp -r /tmp/site/* .
+          git config --global user.name 'GitHub Actions'
+          git config --global user.email 'actions@github.com'
+          git add --all
+          git commit -m 'Publish Python docs'
+          git push -f origin gh-pages-tmp:gh-pages || true
diff --git a/docs/python-api-intro.md b/docs/python-api-intro.md
deleted file mode 100644
index 80c1039d0cef..000000000000
--- a/docs/python-api-intro.md
+++ /dev/null
@@ -1,184 +0,0 @@
----
-title: "Python API"
-url: python-api-intro
-aliases:
-    - "python/api-intro"
-menu:
-    main:
-        parent: "API"
-        weight: 500
----
-<!--
- - Licensed to the Apache Software Foundation (ASF) under one or more
- - contributor license agreements.  See the NOTICE file distributed with
- - this work for additional information regarding copyright ownership.
- - The ASF licenses this file to You under the Apache License, Version 2.0
- - (the "License"); you may not use this file except in compliance with
- - the License.  You may obtain a copy of the License at
- -
- -   http://www.apache.org/licenses/LICENSE-2.0
- -
- - Unless required by applicable law or agreed to in writing, software
- - distributed under the License is distributed on an "AS IS" BASIS,
- - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- - See the License for the specific language governing permissions and
- - limitations under the License.
- -->
-
-# Iceberg Python API
-
-Much of the python api conforms to the java api. You can get more info about the java api [here](../api).
-
-## Catalog
-
-The Catalog interface, like java provides search and management operations for tables.
-
-To create a catalog:
-
-``` python
-from iceberg.hive import HiveTables
-
-# instantiate Hive Tables
-conf = {"hive.metastore.uris": 'thrift://{hms_host}:{hms_port}',
-        "hive.metastore.warehouse.dir": {tmpdir} }
-tables = HiveTables(conf)
-```
-
-and to create a table from a catalog:
-
-``` python
-from iceberg.api.schema import Schema\
-from iceberg.api.types import TimestampType, DoubleType, StringType, NestedField
-from iceberg.api.partition_spec import PartitionSpecBuilder
-
-schema = Schema(NestedField.optional(1, "DateTime", TimestampType.with_timezone()),
-                NestedField.optional(2, "Bid", DoubleType.get()),
-                NestedField.optional(3, "Ask", DoubleType.get()),
-                NestedField.optional(4, "symbol", StringType.get()))
-partition_spec = PartitionSpecBuilder(schema).add(1, 1000, "DateTime_day", "day").build()
-
-tables.create(schema, "test.test_123", partition_spec)
-```
-
-
-## Tables
-
-The Table interface provides access to table metadata
-
-+ schema returns the current table `Schema`
-+ spec returns the current table `PartitonSpec`
-+ properties returns a map of key-value `TableProperties`
-+ currentSnapshot returns the current table `Snapshot`
-+ snapshots returns all valid snapshots for the table
-+ snapshot(id) returns a specific snapshot by ID
-+ location returns the table’s base location
-
-Tables also provide refresh to update the table to the latest version.
-
-### Scanning
-Iceberg table scans start by creating a `TableScan` object with `newScan`.
-
-``` python
-scan = table.new_scan();
-```
-
-To configure a scan, call filter and select on the `TableScan` to get a new `TableScan` with those changes.
-
-``` python
-filtered_scan = scan.filter(Expressions.equal("id", 5))
-```
-
-String expressions can also be passed to the filter method.
-
-``` python
-filtered_scan = scan.filter("id=5")
-```
-
-`Schema` projections can be applied against a `TableScan` by passing a list of column names.
-
-``` python
-filtered_scan = scan.select(["col_1", "col_2", "col_3"])
-```
-
-Because some data types cannot be read using the python library, a convenience method for excluding columns from projection is provided.
-
-``` python
-filtered_scan = scan.select_except(["unsupported_col_1", "unsupported_col_2"])
-```
-
-
-Calls to configuration methods create a new `TableScan` so that each `TableScan` is immutable.
-
-When a scan is configured, `planFiles`, `planTasks`, and `Schema` are used to return files, tasks, and the read projection.
-
-``` python
-scan = table.new_scan() \
-    .filter("id=5") \
-    .select(["id", "data"])
-
-projection = scan.schema
-for task in scan.plan_tasks():
-    print(task)
-```
-
-## Types
-
-Iceberg data types are located in `iceberg.api.types.types`
-
-### Primitives
-
-Primitive type instances are available from static methods in each type class. Types without parameters use `get`, and types like `DecimalType` use factory methods:
-
-```python
-IntegerType.get()    # int
-DoubleType.get()     # double
-DecimalType.of(9, 2) # decimal(9, 2)
-```
-
-### Nested types
-Structs, maps, and lists are created using factory methods in type classes.
-
-Like struct fields, map keys or values and list elements are tracked as nested fields. Nested fields track [field IDs](https://iceberg.apache.org/evolution/#correctness) and nullability.
-
-Struct fields are created using `NestedField.optional` or `NestedField.required`. Map value and list element nullability is set in the map and list factory methods.
-
-```python
-# struct<1 id: int, 2 data: optional string>
-struct = StructType.of([NestedField.required(1, "id", IntegerType.get()),
-                        NestedField.optional(2, "data", StringType.get()])
-  )
-```
-```python
-# map<1 key: int, 2 value: optional string>
-map_var = MapType.of_optional(1, IntegerType.get(),
-                          2, StringType.get())
-```
-```python
-# array<1 element: int>
-list_var = ListType.of_required(1, IntegerType.get());
-```
-
-## Expressions
-Iceberg’s `Expressions` are used to configure table scans. To create `Expressions`, use the factory methods in `Expressions`.
-
-Supported `Predicate` expressions are:
-
-+ `is_null`
-+ `not_null`
-+ `equal`
-+ `not_equal`
-+ `less_than`
-+ `less_than_or_equal`
-+ `greater_than`
-+ `greater_than_or_equal`
-
-Supported expression `Operations`are:
-
-+ `and`
-+ `or`
-+ `not`
-
-Constant expressions are:
-
-+ `always_true`
-+ `always_false`
diff --git a/docs/python-feature-support.md b/docs/python-feature-support.md
deleted file mode 100644
index 4488e4763bac..000000000000
--- a/docs/python-feature-support.md
+++ /dev/null
@@ -1,79 +0,0 @@
----
-title: "Python Feature Support"
-url: python-feature-support
-aliases:
-    - "python/feature-support"
-menu:
-    main:
-        parent: "API"
-        weight: 600
----
-<!--
- - Licensed to the Apache Software Foundation (ASF) under one or more
- - contributor license agreements.  See the NOTICE file distributed with
- - this work for additional information regarding copyright ownership.
- - The ASF licenses this file to You under the Apache License, Version 2.0
- - (the "License"); you may not use this file except in compliance with
- - the License.  You may obtain a copy of the License at
- -
- -   http://www.apache.org/licenses/LICENSE-2.0
- -
- - Unless required by applicable law or agreed to in writing, software
- - distributed under the License is distributed on an "AS IS" BASIS,
- - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- - See the License for the specific language governing permissions and
- - limitations under the License.
- -->
-
-# Feature Support
-
-The goal is that the python library will provide a functional, performant subset of the java library. The initial focus has been on reading table metadata as well as providing the capability to both plan and execute a scan.
-
-## Feature Comparison
-
-### Metadata
-
-| Operation               | Java  | Python |
-|:------------------------|:-----:|:------:|
-| Get Schema              |    X  |    X   |
-| Get Snapshots           |    X  |    X   |
-| Plan Scan               |    X  |    X   |
-| Plan Scan for Snapshot  |    X  |    X   |
-| Update Current Snapshot |    X  |        |
-| Set Table Properties    |    X  |        |
-| Create Table            |    X  |    X   |
-| Drop Table              |    X  |    X   |
-| Alter Table             |    X  |        |
-
-
-### Read Support
-
-Pyarrow is used for reading parquet files, so read support is limited to what is currently supported in the pyarrow.parquet package.
-
-#### Primitive Types
-
-
-| Data Type               | Java | Python |
-|:------------------------|:----:|:------:|
-| BooleanType             |   X  |    X   |
-| DateType                |   X  |    X   |
-| DecimalType             |   X  |    X   |
-| FloatType               |   X  |    X   |
-| IntegerType             |   X  |    X   |
-| LongType                |   X  |    X   |
-| TimeType                |   X  |    X   |
-| TimestampType           |   X  |    X   |
-
-#### Nested Types
-
-| Data Type               | Java | Python |
-|:------------------------|:----:|:------:|
-| ListType of primitives  |   X  |    X   |
-| MapType of primitives   |   X  |    X   |
-| StructType of primitives|   X  |    X   |
-| ListType of Nested Types|   X  |        |
-| MapType of Nested Types |   X  |        |
-
-### Write Support
-
-The python client does not currently support write capability
diff --git a/docs/python-quickstart.md b/docs/python-quickstart.md
deleted file mode 100644
index 03c3acbe81a5..000000000000
--- a/docs/python-quickstart.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-title: "Python Quickstart"
-url: python-quickstart
-aliases:
-    - "python/quickstart"
-menu:
-    main:
-        parent: "API"
-        weight: 400
----
-<!--
- - Licensed to the Apache Software Foundation (ASF) under one or more
- - contributor license agreements.  See the NOTICE file distributed with
- - this work for additional information regarding copyright ownership.
- - The ASF licenses this file to You under the Apache License, Version 2.0
- - (the "License"); you may not use this file except in compliance with
- - the License.  You may obtain a copy of the License at
- -
- -   http://www.apache.org/licenses/LICENSE-2.0
- -
- - Unless required by applicable law or agreed to in writing, software
- - distributed under the License is distributed on an "AS IS" BASIS,
- - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- - See the License for the specific language governing permissions and
- - limitations under the License.
- -->
-
-
-# Python API Quickstart
-
-## Installation
-
-Iceberg python is currently in development, for development and testing purposes the best way to install the library is to perform the following steps:
-```
-git clone https://github.com/apache/iceberg.git
-cd iceberg/python
-pip install -e .
-```
-
-## Testing
-Testing is done using tox. The config can be found in `tox.ini` within the python directory of the iceberg project.
-
-```
-# simply run tox from within the python dir
-tox
-```
-
-# Examples
-
-## Inspect Table Metadata
-``` python
-
-from iceberg.hive import HiveTables
-
-# instantiate Hive Tables
-conf = {"hive.metastore.uris": 'thrift://{hms_host}:{hms_port}'}
-tables = HiveTables(conf)
-
-# load table
-tbl = tables.load("iceberg_db.iceberg_test_table")
-
-# inspect metadata
-print(tbl.schema())
-print(tbl.spec())
-print(tbl.location())
-
-# get table level record count
-from pprint import pprint
-pprint(int(tbl.current_snapshot().summary.get("total-records")))
-```
diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml
index cce5d2ba0bf2..54c644b32cf0 100644
--- a/python/.pre-commit-config.yaml
+++ b/python/.pre-commit-config.yaml
@@ -63,3 +63,11 @@ repos:
       - id: flake8
         args: [ "--ignore=E501,W503,E203" ]
         additional_dependencies: [ flake8-bugbear==22.7.1, flake8-comprehensions==3.10.0 ]
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.16
+    hooks:
+      - id: mdformat
+        additional_dependencies:
+          - mdformat-black
+          - mdformat-config
+          - mdformat-beautysh
diff --git a/python/CONTRIBUTING.md b/python/CONTRIBUTING.md
index 91ae8215b4cd..1bde5ce7db0d 100644
--- a/python/CONTRIBUTING.md
+++ b/python/CONTRIBUTING.md
@@ -26,6 +26,7 @@ pip install poetry
 ```
 
 If you have an older version of pip and virtualenv you need to update these:
+
 ```bash
 pip install --upgrade virtualenv pip
 ```
@@ -81,11 +82,13 @@ make test-s3
 To pass additional arguments to pytest, you can use `PYTEST_ARGS`.
 
 *Run pytest in verbose mode*
+
 ```sh
 make test PYTEST_ARGS="-v"
 ```
 
 *Run pytest with pdb enabled*
+
 ```sh
 make test PYTEST_ARGS="--pdb"
 ```
diff --git a/python/README.md b/python/README.md
index 292fb7b9eeac..6dfefbce6a01 100644
--- a/python/README.md
+++ b/python/README.md
@@ -17,12 +17,11 @@
 
 # Iceberg Python
 
-py-iceberg is a python library for programmatic access to iceberg table metadata as well as to table data in iceberg format.
-It is an implementation of [iceberg table spec](https://iceberg.apache.org/spec/) in Python.
+pyiceberg is a python library for programmatic access to iceberg table metadata as well as to table data in iceberg format. It is a Python implementation of [iceberg table spec](https://iceberg.apache.org/spec/). Documentation is available at [https://pyiceberg.apache.org/](https://pyiceberg.apache.org/).
 
 ## Getting Started
 
-py-iceberg is currently in development, for development and testing purposes the best way to install the library is to perform the following steps:
+pyiceberg is currently in development, for development and testing purposes the best way to install the library is to perform the following steps:
 
 ```
 git clone https://github.com/apache/iceberg.git
@@ -30,11 +29,9 @@ cd iceberg/python
 pip install -e .
 ```
 
-Development is made easy using [Poetry](https://python-poetry.org/docs/#installation).
-
 ## Development
 
-Poetry provides virtual environments for development:
+Development is made easy using [Poetry](https://python-poetry.org/docs/#installation). Poetry provides virtual environments for development:
 
 ```bash
 poetry shell
@@ -54,4 +51,5 @@ poetry run pytest
 ```
 
 ## Get in Touch
+
 - [Iceberg community](https://iceberg.apache.org/community/)
diff --git a/python/mkdocs/README.md b/python/mkdocs/README.md
new file mode 100644
index 000000000000..e9e0462bee5a
--- /dev/null
+++ b/python/mkdocs/README.md
@@ -0,0 +1,28 @@
+<!--
+ - Licensed to the Apache Software Foundation (ASF) under one or more
+ - contributor license agreements.  See the NOTICE file distributed with
+ - this work for additional information regarding copyright ownership.
+ - The ASF licenses this file to You under the Apache License, Version 2.0
+ - (the "License"); you may not use this file except in compliance with
+ - the License.  You may obtain a copy of the License at
+ -
+ -   http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+ -->
+
+# Docs
+
+The pyiceberg docs are stored in `docs/`.
+
+## Running docs locally
+
+```sh
+pip3 install -r requirements.txt
+mkdocs serve
+open http://localhost:8000/
+```
diff --git a/python/mkdocs/docs/index.md b/python/mkdocs/docs/index.md
new file mode 100644
index 000000000000..35351ef0c86f
--- /dev/null
+++ b/python/mkdocs/docs/index.md
@@ -0,0 +1,548 @@
+<!--
+ - Licensed to the Apache Software Foundation (ASF) under one or more
+ - contributor license agreements.  See the NOTICE file distributed with
+ - this work for additional information regarding copyright ownership.
+ - The ASF licenses this file to You under the Apache License, Version 2.0
+ - (the "License"); you may not use this file except in compliance with
+ - the License.  You may obtain a copy of the License at
+ -
+ -   http://www.apache.org/licenses/LICENSE-2.0
+ -
+ - Unless required by applicable law or agreed to in writing, software
+ - distributed under the License is distributed on an "AS IS" BASIS,
+ - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ - See the License for the specific language governing permissions and
+ - limitations under the License.
+ -->
+
+# PyIceberg
+
+Much of the python api conforms to the Java API. You can get more info about the java api [here](https://iceberg.apache.org/docs/latest/java-api-quickstart/).
+
+## Installing
+
+You can install the latest release version from pypi:
+
+```sh
+pip3 install "pyiceberg[s3fs,hive]"
+```
+
+Or install the latest development version locally:
+
+```sh
+git clone https://github.com/apache/iceberg.git
+cd iceberg/python
+pip3 install -e ".[s3fs,hive]"
+```
+
+You can mix and match optional dependencies:
+
+| Key       | Description:                                                         |
+|-----------|----------------------------------------------------------------------|
+| hive      | Support for the Hive metastore                                       |
+| pyarrow   | PyArrow as a FileIO implementation to interact with the object store |
+| s3fs      | S3FS as a FileIO implementation to interact with the object store    |
+| snappy    | Support for snappy Avro compression                                  |
+
+# Python CLI Quickstart
+
+Pyiceberg comes with a CLI that's available after installing the `pyiceberg` package.
+
+```sh
+➜  pyiceberg --help
+Usage: pyiceberg [OPTIONS] COMMAND [ARGS]...
+
+Options:
+--catalog TEXT
+--verbose BOOLEAN
+--output [text|json]
+--uri TEXT
+--credential TEXT
+--help                Show this message and exit.
+
+Commands:
+describe    Describes a namespace xor table
+drop        Operations to drop a namespace or table
+list        Lists tables or namespaces
+location    Returns the location of the table
+properties  Properties on tables/namespaces
+rename      Renames a table
+schema      Gets the schema of the table
+spec        Returns the partition spec of the table
+uuid        Returns the UUID of the table
+```
+
+# Configuration
+
+There are three ways of setting the configuration.
+
+For the CLI you can pass it in using `--uri` and `--credential` and it will automatically detect the type based on the scheme (`http(s)` for rest, `thrift` for Hive).
+
+Secondly, YAML based configuration is supported `cat ~/.pyiceberg.yaml`:
+
+```yaml
+catalog:
+  default:
+    uri: thrift://localhost:9083
+    s3.endpoint: http://localhost:9000
+    s3.access-key-id: admin
+    s3.secret-access-key: password
+
+  rest:
+    uri: http://rest-catalog/ws/
+    credential: t-1234:secret
+```
+
+Lastly, you can also set it using environment variables:
+
+```sh
+export PYICEBERG_CATALOG__DEFAULT__URI=thrift://localhost:9083
+
+export PYICEBERG_CATALOG__REST__URI=http://rest-catalog/ws/
+export PYICEBERG_CATALOG__REST__CREDENTIAL=t-1234:secret
+```
+
+Where the structure is equivalent to the YAML. The levels are separated using a double underscore (`__`).
+
+## FileIO configuration
+
+For the FileIO there are several configuration options available:
+
+| Key                  | Example             | Description                                                                                                                                                                                                                                               |
+|----------------------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| s3.endpoint          | https://10.0.19.25/ | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. |
+| s3.access-key-id     | admin               | Configure the static secret access key used to access the FileIO.                                                                                                                                                                                         |
+| s3.secret-access-key | password            | Configure the static session token used to access the FileIO.                                                                                                                                                                                             |
+| s3.signer            | bearer              | Configure the signature version of the FileIO.                                                                                                                                                                                                            |
+
+# CLI Quickstart
+
+This example assumes that you have a default catalog set. If you want to load another catalog, for example, the rest example above. Then you need to set `--catalog rest`.
+
+```sh
+➜  pyiceberg list
+default
+nyc
+```
+
+```sh
+➜  pyiceberg list nyc
+nyc.taxis
+```
+
+```sh
+pyiceberg describe nyc.taxis
+Table format version  1
+Metadata location     file:/.../nyc.db/taxis/metadata/00000-aa3a3eac-ea08-4255-b890-383a64a94e42.metadata.json
+Table UUID            6cdfda33-bfa3-48a7-a09e-7abb462e3460
+Last Updated          1661783158061
+Partition spec        []
+Sort order            []
+Current schema        Schema, id=0
+├── 1: VendorID: optional long
+├── 2: tpep_pickup_datetime: optional timestamptz
+├── 3: tpep_dropoff_datetime: optional timestamptz
+├── 4: passenger_count: optional double
+├── 5: trip_distance: optional double
+├── 6: RatecodeID: optional double
+├── 7: store_and_fwd_flag: optional string
+├── 8: PULocationID: optional long
+├── 9: DOLocationID: optional long
+├── 10: payment_type: optional long
+├── 11: fare_amount: optional double
+├── 12: extra: optional double
+├── 13: mta_tax: optional double
+├── 14: tip_amount: optional double
+├── 15: tolls_amount: optional double
+├── 16: improvement_surcharge: optional double
+├── 17: total_amount: optional double
+├── 18: congestion_surcharge: optional double
+└── 19: airport_fee: optional double
+Current snapshot      Operation.APPEND: id=5937117119577207079, schema_id=0
+Snapshots             Snapshots
+└── Snapshot 5937117119577207079, schema 0: file:/.../nyc.db/taxis/metadata/snap-5937117119577207079-1-94656c4f-4c66-4600-a4ca-f30377300527.avro
+Properties            owner                 root
+write.format.default  parquet
+```
+
+Or output in JSON for automation:
+
+```sh
+pyiceberg --output json describe nyc.taxis | jq
+{
+  "identifier": [
+    "nyc",
+    "taxis"
+  ],
+  "metadata_location": "file:/.../nyc.db/taxis/metadata/00000-aa3a3eac-ea08-4255-b890-383a64a94e42.metadata.json",
+  "metadata": {
+    "location": "file:/.../nyc.db/taxis",
+    "table-uuid": "6cdfda33-bfa3-48a7-a09e-7abb462e3460",
+    "last-updated-ms": 1661783158061,
+    "last-column-id": 19,
+    "schemas": [
+      {
+        "type": "struct",
+        "fields": [
+          {
+            "id": 1,
+            "name": "VendorID",
+            "type": "long",
+            "required": false
+          },
+...
+          {
+            "id": 19,
+            "name": "airport_fee",
+            "type": "double",
+            "required": false
+          }
+        ],
+        "schema-id": 0,
+        "identifier-field-ids": []
+      }
+    ],
+    "current-schema-id": 0,
+    "partition-specs": [
+      {
+        "spec-id": 0,
+        "fields": []
+      }
+    ],
+    "default-spec-id": 0,
+    "last-partition-id": 999,
+    "properties": {
+      "owner": "root",
+      "write.format.default": "parquet"
+    },
+    "current-snapshot-id": 5937117119577207000,
+    "snapshots": [
+      {
+        "snapshot-id": 5937117119577207000,
+        "timestamp-ms": 1661783158061,
+        "manifest-list": "file:/.../nyc.db/taxis/metadata/snap-5937117119577207079-1-94656c4f-4c66-4600-a4ca-f30377300527.avro",
+        "summary": {
+          "operation": "append",
+          "spark.app.id": "local-1661783139151",
+          "added-data-files": "1",
+          "added-records": "2979431",
+          "added-files-size": "46600777",
+          "changed-partition-count": "1",
+          "total-records": "2979431",
+          "total-files-size": "46600777",
+          "total-data-files": "1",
+          "total-delete-files": "0",
+          "total-position-deletes": "0",
+          "total-equality-deletes": "0"
+        },
+        "schema-id": 0
+      }
+    ],
+    "snapshot-log": [
+      {
+        "snapshot-id": "5937117119577207079",
+        "timestamp-ms": 1661783158061
+      }
+    ],
+    "metadata-log": [],
+    "sort-orders": [
+      {
+        "order-id": 0,
+        "fields": []
+      }
+    ],
+    "default-sort-order-id": 0,
+    "refs": {
+      "main": {
+        "snapshot-id": 5937117119577207000,
+        "type": "branch"
+      }
+    },
+    "format-version": 1,
+    "schema": {
+      "type": "struct",
+      "fields": [
+        {
+          "id": 1,
+          "name": "VendorID",
+          "type": "long",
+          "required": false
+        },
+...
+        {
+          "id": 19,
+          "name": "airport_fee",
+          "type": "double",
+          "required": false
+        }
+      ],
+      "schema-id": 0,
+      "identifier-field-ids": []
+    },
+    "partition-spec": []
+  }
+}
+```
+
+# Python API
+
+To instantiate a catalog:
+
+```python
+from pyiceberg.catalog import load_catalog
+
+catalog = load_catalog("prod")
+
+catalog.list_namespaces()
+```
+
+Returns:
+
+```
+[('default',), ('nyc',)]
+```
+
+Listing the tables in the `nyc` namespace:
+
+```python
+catalog.list_tables("nyc")
+```
+
+Returns:
+
+```
+[('nyc', 'taxis')]
+```
+
+Loading the `taxis` table:
+
+```python
+catalog.load_table(("nyc", "taxis"))
+```
+
+```
+Table(
+  identifier=('nyc', 'taxis'),
+  metadata_location='s3a://warehouse/wh/nyc.db/taxis/metadata/00002-6ea51ce3-62aa-4197-9cf8-43d07c3440ca.metadata.json',
+  metadata=TableMetadataV2(
+    location='s3a://warehouse/wh/nyc.db/taxis',
+    table_uuid=UUID('ebd5d172-2162-453d-b586-1cdce52c1116'),
+    last_updated_ms=1662633437826,
+    last_column_id=19,
+    schemas=[Schema(
+        NestedField(field_id=1, name='VendorID', field_type=LongType(), required=False),
+        NestedField(field_id=2, name='tpep_pickup_datetime', field_type=TimestamptzType(), required=False),
+        NestedField(field_id=3, name='tpep_dropoff_datetime', field_type=TimestamptzType(), required=False),
+        NestedField(field_id=4, name='passenger_count', field_type=DoubleType(), required=False),
+        NestedField(field_id=5, name='trip_distance', field_type=DoubleType(), required=False),
+        NestedField(field_id=6, name='RatecodeID', field_type=DoubleType(), required=False),
+        NestedField(field_id=7, name='store_and_fwd_flag', field_type=StringType(), required=False),
+        NestedField(field_id=8, name='PULocationID', field_type=LongType(), required=False),
+        NestedField(field_id=9, name='DOLocationID', field_type=LongType(), required=False),
+        NestedField(field_id=10, name='payment_type', field_type=LongType(), required=False),
+        NestedField(field_id=11, name='fare_amount', field_type=DoubleType(), required=False),
+        NestedField(field_id=12, name='extra', field_type=DoubleType(), required=False),
+        NestedField(field_id=13, name='mta_tax', field_type=DoubleType(), required=False),
+        NestedField(field_id=14, name='tip_amount', field_type=DoubleType(), required=False),
+        NestedField(field_id=15, name='tolls_amount', field_type=DoubleType(), required=False),
+        NestedField(field_id=16, name='improvement_surcharge', field_type=DoubleType(), required=False),
+        NestedField(field_id=17, name='total_amount', field_type=DoubleType(), required=False),
+        NestedField(field_id=18, name='congestion_surcharge', field_type=DoubleType(), required=False),
+        NestedField(field_id=19, name='airport_fee', field_type=DoubleType(), required=False)
+      ),
+      schema_id=0,
+      identifier_field_ids=[]
+    )],
+    current_schema_id=0,
+    partition_specs=[PartitionSpec(spec_id=0)],
+    default_spec_id=0,
+    last_partition_id=999,
+    properties={
+      'owner': 'root',
+      'write.format.default': 'parquet'
+    },
+    current_snapshot_id=8334458494559715805,
+    snapshots=[
+      Snapshot(
+        snapshot_id=7910949481055846233,
+        parent_snapshot_id=None,
+        sequence_number=None,
+        timestamp_ms=1662489306555,
+        manifest_list='s3a://warehouse/wh/nyc.db/taxis/metadata/snap-7910949481055846233-1-3eb7a2e1-5b7a-4e76-a29a-3e29c176eea4.avro',
+        summary=Summary(
+          Operation.APPEND,
+          **{
+            'spark.app.id': 'local-1662489289173',
+            'added-data-files': '1',
+            'added-records': '2979431',
+            'added-files-size': '46600777',
+            'changed-partition-count': '1',
+            'total-records': '2979431',
+            'total-files-size': '46600777',
+            'total-data-files': '1',
+            'total-delete-files': '0',
+            'total-position-deletes': '0',
+            'total-equality-deletes': '0'
+          }
+        ),
+        schema_id=0
+      ),
+    ],
+    snapshot_log=[
+      SnapshotLogEntry(
+        snapshot_id='7910949481055846233',
+        timestamp_ms=1662489306555
+      )
+    ],
+    metadata_log=[
+      MetadataLogEntry(
+        metadata_file='s3a://warehouse/wh/nyc.db/taxis/metadata/00000-b58341ba-6a63-4eea-9b2f-e85e47c7d09f.metadata.json',
+        timestamp_ms=1662489306555
+      )
+    ],
+    sort_orders=[SortOrder(order_id=0)],
+    default_sort_order_id=0,
+    refs={
+      'main': SnapshotRef(
+        snapshot_id=8334458494559715805,
+        snapshot_ref_type=SnapshotRefType.BRANCH,
+        min_snapshots_to_keep=None,
+        max_snapshot_age_ms=None,
+        max_ref_age_ms=None
+      )
+    },
+    format_version=2,
+    last_sequence_number=1
+  )
+)
+```
+
+And to create a table from a catalog:
+
+```python
+from pyiceberg.schema import Schema
+from pyiceberg.types import TimestampType, DoubleType, StringType, NestedField
+
+schema = Schema(
+    NestedField(
+        field_id=1, name="datetime", field_type=TimestampType(), required=False
+    ),
+    NestedField(field_id=2, name="bid", field_type=DoubleType(), required=False),
+    NestedField(field_id=3, name="ask", field_type=DoubleType(), required=False),
+    NestedField(field_id=4, name="symbol", field_type=StringType(), required=False),
+)
+
+from pyiceberg.table.partitioning import PartitionSpec, PartitionField
+from pyiceberg.transforms import DayTransform
+
+partition_spec = PartitionSpec(
+    PartitionField(
+        source_id=1, field_id=1000, transform=DayTransform(), name="datetime_day"
+    )
+)
+
+from pyiceberg.table.sorting import SortOrder, SortField
+from pyiceberg.transforms import IdentityTransform
+
+sort_order = SortOrder(SortField(source_id=4, transform=IdentityTransform()))
+
+from pyiceberg.catalog.hive import HiveCatalog
+
+catalog = HiveCatalog(name="prod", uri="thrift://localhost:9083/")
+
+catalog.create_table(
+    identifier="default.bids",
+    location="/Users/fokkodriesprong/Desktop/docker-spark-iceberg/wh/bids/",
+    schema=schema,
+    partition_spec=partition_spec,
+    sort_order=sort_order,
+)
+```
+
+Which returns a newly created table:
+
+```
+Table(
+    identifier=('default', 'bids'),
+    metadata_location='/Users/fokkodriesprong/Desktop/docker-spark-iceberg/wh/bids//metadata/00000-c8cd93ab-f784-474d-a167-b1a86b05195f.metadata.json',
+    metadata=TableMetadataV2(
+        location='/Users/fokkodriesprong/Desktop/docker-spark-iceberg/wh/bids/',
+        table_uuid=UUID('38d4cb39-4945-4bf2-b374-984b5c4984d2'),
+        last_updated_ms=1661847562069,
+        last_column_id=4,
+        schemas=[
+            Schema(
+                NestedField(field_id=1, name='datetime', field_type=TimestampType(), required=False),
+                NestedField(field_id=2, name='bid', field_type=DoubleType(), required=False),
+                NestedField(field_id=3, name='ask', field_type=DoubleType(), required=False),
+                NestedField(field_id=4, name='symbol', field_type=StringType(), required=False)),
+                schema_id=1,
+                identifier_field_ids=[])
+        ],
+        current_schema_id=1,
+        partition_specs=[
+            PartitionSpec(
+                PartitionField(source_id=1, field_id=1000, transform=DayTransform(), name='datetime_day'),))
+        ],
+        default_spec_id=0,
+        last_partition_id=1000,
+        properties={},
+        current_snapshot_id=None,
+        snapshots=[],
+        snapshot_log=[],
+        metadata_log=[],
+        sort_orders=[
+            SortOrder(order_id=1, fields=[SortField(source_id=4, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST)])
+        ],
+        default_sort_order_id=1,
+        refs={},
+        format_version=2,
+        last_sequence_number=0
+    )
+)
+```
+
+# Feature Support
+
+The goal is that the python library will provide a functional, performant subset of the Java library. The initial focus has been on reading table metadata and provide a convenient CLI to go through the catalog.
+
+## Metadata
+
+| Operation               | Java  | Python |
+|:------------------------|:-----:|:------:|
+| Get Schema              |    X  |   X    |
+| Get Snapshots           |    X  |   X    |
+| Plan Scan               |    X  |        |
+| Plan Scan for Snapshot  |    X  |        |
+| Update Current Snapshot |    X  |        |
+| Set Table Properties    |    X  |   X    |
+| Create Table            |    X  |   X    |
+| Drop Table              |    X  |   X    |
+| Alter Table             |    X  |        |
+
+## Types
+
+The types are kept in `pyiceberg.types`.
+
+Primitive types:
+
+- `BooleanType`
+- `StringType`
+- `IntegerType`
+- `LongType`
+- `FloatType`
+- `DoubleType`
+- `DateType`
+- `TimeType`
+- `TimestampType`
+- `TimestamptzType`
+- `BinaryType`
+- `UUIDType`
+
+Complex types:
+
+- `StructType`
+- `ListType`
+- `MapType`
+- `FixedType(16)`
+- `DecimalType(8, 3)`
diff --git a/python/mkdocs/mkdocs.yml b/python/mkdocs/mkdocs.yml
new file mode 100644
index 000000000000..c84a2de465ee
--- /dev/null
+++ b/python/mkdocs/mkdocs.yml
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+site_name: PyIceberg
diff --git a/python/mkdocs/requirements.txt b/python/mkdocs/requirements.txt
new file mode 100644
index 000000000000..642a688ebc5a
--- /dev/null
+++ b/python/mkdocs/requirements.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+mkdocs==1.3.1
+jinja2==3.0.3

From d3900cbf2987d9519506f8331a9fede7264b969a Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Wed, 21 Sep 2022 20:04:40 +0200
Subject: [PATCH 2/2] Comments

---
 .github/workflows/python-ci-docs.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/python-ci-docs.yml b/.github/workflows/python-ci-docs.yml
index 3f579b5997b3..9ae255328d61 100644
--- a/.github/workflows/python-ci-docs.yml
+++ b/.github/workflows/python-ci-docs.yml
@@ -22,9 +22,8 @@ on:
   push:
     branches:
       - 'master'
-  pull_request:
     paths:
-      - '.github/workflows/python-ci-docs.yml'
+      - 'python/docs/**'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}