From 7d6ac93799aeda9d9c9815efca32fae5bd6779ce Mon Sep 17 00:00:00 2001
From: LsomeYeah <lleiyeah.cat@outlook.com>
Date: Tue, 14 Jan 2025 11:49:07 +0800
Subject: [PATCH 1/2] [doc] first doc

---
 .../migration/migration-from-iceberg.md       | 179 ++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 docs/content/migration/migration-from-iceberg.md
diff --git a/docs/content/migration/migration-from-iceberg.md b/docs/content/migration/migration-from-iceberg.md
new file mode 100644
index 000000000000..c0a175cb9986
--- /dev/null
+++ b/docs/content/migration/migration-from-iceberg.md
@@ -0,0 +1,179 @@
+---
+title: "Migration From Iceberg"
+weight: 1
+type: docs
+aliases:
+- /migration/migration-from-iceberg.html
+---
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Iceberg Migration
+
+Apache Iceberg data with parquet file format could be migrated to Apache Paimon.
+When migrating an iceberg table to a paimon table, the origin iceberg table will be permanently disappeared. **So please back up your data if you
+still need the original table.** The migrated paimon table will be a [append table]({{< ref "append-table/overview" >}}).
+
+<span style="color: red; "> **We highly recommend to back up iceberg table data before migrating, because migrating action is not atomic. If been interrupted while migrating, you may lose your data.** </span>
+
+## Migrate Iceberg Table
+Currently, we can use paimon catalog with MigrateTableProcedure or MigrateTableAction to migrate the data used by **latest iceberg snapshot** 
+in an iceberg table to a paimon table. 
+
+Iceberg tables managed by hadoop-catalog or hive-catalog are supported to be migrated to paimon.
+As for the type of paimon catalog, it only needs to have access to the file system where the iceberg metadata and data files are located. 
+This means we could migrate an iceberg table managed by hadoop-catalog to a paimon table in hive catalog if their warehouses are in the same file system.
+
+When migrating, the iceberg data files which were marked by DELETED will be ignored. Only the data files referenced by manifest entries with 'EXISTING' and 'ADDED' content will be migrated to paimon.
+Notably, now we don't support migrating iceberg tables with delete files(deletion vectors, position delete files, equality delete files etc.)
+
+Now only parquet format is supported in iceberg migration.
+
+### MigrateTableProcedure
+You can run the following command to migrate an iceberg table to a paimon table.
+```sql
+-- Use named argument
+CALL sys.migrate_table(connector => 'iceberg', source_table => '${database_name.table_name}', options => '${paimon_options}', parallelism => ${parallelism}, iceberg_options => '${iceberg_options}');
+
+-- Use indexed argument
+CALL sys.migrate_table('connector', 'source_table', 'options', 'parallelism', 'iceberg_options');
+```
+* `connector` is used to specify the data source, in iceberg migration, it is always `iceberg`.
+* `source_table` is used to specify the source iceberg table to migrate, it's required;
+* `options` is used to specify the additional options for the target paimon table, it's optional.
+* `parallelism`, integer type, is used to specify the parallelism of the migration job, it's optional.
+* `iceberg_options` is used to specify the configuration of migration, multiple configuration items are separated by commas. it's required. 
+
+#### hadoop-catalog
+To migrate iceberg table managed by hadoop-catalog, you need set `metadata.iceberg.storage=hadoop-catalog` and `iceberg_warehouse`. Example:
+```sql
+CREATE CATALOG paimon_catalog WITH ('type' = 'paimon', 'warehouse' = '/path/to/paimon/warehouse');
+
+USE CATALOG paimon_catalog;
+
+CALL sys.migrate_table(
+    connector => 'iceberg', 
+    source_table => 'iceberg_db.iceberg_tbl',
+    iceberg_options => 'metadata.iceberg.storage=hadoop-catalog,iceberg_warehouse=/path/to/iceberg/warehouse'
+);
+```
+If you want the metadata of the migrated paimon table to be managed by hive, you can also create a hive catalog of paimon for migration. Example:
+```sql
+CREATE CATALOG paimon_catalog WITH (
+    'type' = 'paimon', 
+    'metastore' = 'hive', 
+    'uri' = 'thrift://localhost:9083', 
+    'warehouse' = '/path/to/paimon/warehouse'
+);
+
+USE CATALOG paimon_catalog;
+
+CALL sys.migrate_table(
+    connector => 'iceberg', 
+    source_table => 'iceberg_db.iceberg_tbl',
+    iceberg_options => 'metadata.iceberg.storage=hadoop-catalog,iceberg_warehouse=/path/to/iceberg/warehouse'
+);
+```
+
+#### hive-catalog
+To migrate iceberg table managed by hive-catalog, you need set `metadata.iceberg.storage=hive-catalog` 
+and provide information about Hive Metastore used by the iceberg table in `iceberg_options`.
+
+<table class="table table-bordered">
+    <thead>
+    <tr>
+      <th class="text-left" style="width: 20%">Option</th>
+      <th class="text-left" style="width: 5%">Default</th>
+      <th class="text-left" style="width: 10%">Type</th>
+      <th class="text-left" style="width: 60%">Description</th>
+    </tr>
+    </thead>
+    <tbody>
+    <tr>
+      <td><h5>metadata.iceberg.uri</h5></td>
+      <td style="word-wrap: break-word;"></td>
+      <td>String</td>
+      <td>Hive metastore uri for Iceberg Hive catalog.</td>
+    </tr>
+    <tr>
+      <td><h5>metadata.iceberg.hive-conf-dir</h5></td>
+      <td style="word-wrap: break-word;"></td>
+      <td>String</td>
+      <td>hive-conf-dir for Iceberg Hive catalog.</td>
+    </tr>
+    <tr>
+      <td><h5>metadata.iceberg.hadoop-conf-dir</h5></td>
+      <td style="word-wrap: break-word;"></td>
+      <td>String</td>
+      <td>hadoop-conf-dir for Iceberg Hive catalog.</td>
+    </tr>
+    <tr>
+      <td><h5>metadata.iceberg.hive-client-class</h5></td>
+      <td style="word-wrap: break-word;">org.apache.hadoop.hive.metastore.HiveMetaStoreClient</td>
+      <td>String</td>
+      <td>Hive client class name for Iceberg Hive Catalog.</td>
+    </tr>
+    </tbody>
+</table>
+
+Example:
+```sql
+CREATE CATALOG paimon_catalog WITH (
+    'type' = 'paimon', 
+    'metastore' = 'hive', 
+    'uri' = 'thrift://localhost:9083', 
+    'warehouse' = '/path/to/paimon/warehouse'
+);
+
+USE CATALOG paimon_catalog;
+
+CALL sys.migrate_table(
+    connector => 'iceberg', 
+    source_table => 'iceberg_db.iceberg_tbl',
+    iceberg_options => 'metadata.iceberg.storage=hive-catalog,metadata.iceberg.uri=thrift://localhost:9083'
+);
+```
+
+### MigrateTableAction
+You can also use flink action for migration:
+```bash
+<FLINK_HOME>/bin/flink run \
+/path/to/paimon-flink-action-{{< version >}}.jar \
+migrate_table \
+--source_type iceberg \
+--table <icebergDatabase.icebergTable> \
+--iceberg_options <iceberg-conf  [,iceberg-conf ...]> \
+[--parallelism <parallelism>] \
+[--catalog_conf <paimon-catalog-conf> [--catalog_conf <paimon-catalog-conf> ...]] \
+[--options <paimon-table-conf  [,paimon-table-conf ...]> ]
+```
+
+Example:
+```bash
+<FLINK_HOME>/bin/flink run \
+/path/to/paimon-flink-action-{{< version >}}.jar \
+migrate_table \
+--source_type iceberg \
+--table iceberg_db.iceberg_tbl \
+--iceberg_options metadata.iceberg.storage=hive-catalog, metadata.iceberg.uri=thrift://localhost:9083 \
+--parallelism 6 \
+--catalog_conf warehouse=/path/to/paimon/warehouse \
+--catalog_conf metastore=hive \
+--catalog_conf uri=thrift://localhost:9083
+```

From 55a501d3d8305baa8d241e28e3e23974a7d5d2d3 Mon Sep 17 00:00:00 2001
From: LsomeYeah <lleiyeah.cat@outlook.com>
Date: Mon, 17 Feb 2025 13:24:21 +0800
Subject: [PATCH 2/2] [doc] second version

---
 docs/content/flink/procedures.md              | 19 +++++++
 .../migration/migration-from-iceberg.md       | 52 ++++++++-----------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/docs/content/flink/procedures.md b/docs/content/flink/procedures.md
index db364ff0e3b5..8d05d0f91438 100644
--- a/docs/content/flink/procedures.md
+++ b/docs/content/flink/procedures.md
@@ -578,6 +578,25 @@ All available procedures are listed below.
          CALL sys.migrate_file(connector => 'hive', source_table => 'default.T', target_table => 'default.T2', delete_origin => true, parallelism => 6)
       </td>
    </tr>
+   <tr>
+      <td>migrate_iceberg_table</td>
+      <td>
+         -- Use named argument<br/>
+        CALL sys.migrate_iceberg_table(source_table => 'database_name.table_name', iceberg_options => 'iceberg_options', options => 'paimon_options', parallelism => parallelism);<br/><br/>
+        -- Use indexed argument<br/>
+        CALL sys.migrate_iceberg_table('source_table','iceberg_options', 'options', 'parallelism');
+      </td>
+      <td>
+         To migrate iceberg table to paimon. Arguments:
+            <li>source_table: string type, is used to specify the source iceberg table to migrate, it's required.</li>
+            <li>iceberg_options: string type, is used to specify the configuration of migration, multiple configuration items are separated by commas. it's required.</li>
+            <li>options: string type, is used to specify the additional options for the target paimon table, it's optional.</li>
+            <li>parallelism: integer type, is used to specify the parallelism of the migration job, it's optional.</li>
+      </td>
+      <td>
+         CALL sys.migrate_iceberg_table(source_table => 'iceberg_db.iceberg_tbl',iceberg_options => 'metadata.iceberg.storage=hadoop-catalog,iceberg_warehouse=/path/to/iceberg/warehouse');
+      </td>
+   </tr>
    <tr>
       <td>expire_snapshots</td>
       <td>
diff --git a/docs/content/migration/migration-from-iceberg.md b/docs/content/migration/migration-from-iceberg.md
index c0a175cb9986..a6f87f050baa 100644
--- a/docs/content/migration/migration-from-iceberg.md
+++ b/docs/content/migration/migration-from-iceberg.md
@@ -27,38 +27,37 @@ under the License.
 # Iceberg Migration
 
 Apache Iceberg data with parquet file format could be migrated to Apache Paimon.
-When migrating an iceberg table to a paimon table, the origin iceberg table will be permanently disappeared. **So please back up your data if you
-still need the original table.** The migrated paimon table will be a [append table]({{< ref "append-table/overview" >}}).
+When migrating an iceberg table to a paimon table, the origin iceberg table will permanently disappear. **So please back up your data if you
+still need the original table.** The migrated paimon table will be an [append table]({{< ref "append-table/overview" >}}).
 
 <span style="color: red; "> **We highly recommend to back up iceberg table data before migrating, because migrating action is not atomic. If been interrupted while migrating, you may lose your data.** </span>
 
 ## Migrate Iceberg Table
-Currently, we can use paimon catalog with MigrateTableProcedure or MigrateTableAction to migrate the data used by **latest iceberg snapshot** 
+Currently, we can use paimon catalog with MigrateIcebergTableProcedure or MigrateIcebergTableAction to migrate the data used by **latest iceberg snapshot** 
 in an iceberg table to a paimon table. 
 
 Iceberg tables managed by hadoop-catalog or hive-catalog are supported to be migrated to paimon.
-As for the type of paimon catalog, it only needs to have access to the file system where the iceberg metadata and data files are located. 
+As for the type of paimon catalog, it needs to have access to the file system where the iceberg metadata and data files are located. 
 This means we could migrate an iceberg table managed by hadoop-catalog to a paimon table in hive catalog if their warehouses are in the same file system.
 
 When migrating, the iceberg data files which were marked by DELETED will be ignored. Only the data files referenced by manifest entries with 'EXISTING' and 'ADDED' content will be migrated to paimon.
 Notably, now we don't support migrating iceberg tables with delete files(deletion vectors, position delete files, equality delete files etc.)
 
-Now only parquet format is supported in iceberg migration.
+Now only **parquet** format is supported in iceberg migration.
 
-### MigrateTableProcedure
+### MigrateIcebergTableProcedure
 You can run the following command to migrate an iceberg table to a paimon table.
 ```sql
 -- Use named argument
-CALL sys.migrate_table(connector => 'iceberg', source_table => '${database_name.table_name}', options => '${paimon_options}', parallelism => ${parallelism}, iceberg_options => '${iceberg_options}');
+CALL sys.migrate_iceberg_table(source_table => 'database_name.table_name', iceberg_options => 'iceberg_options', options => 'paimon_options', parallelism => parallelism);
 
 -- Use indexed argument
-CALL sys.migrate_table('connector', 'source_table', 'options', 'parallelism', 'iceberg_options');
+CALL sys.migrate_iceberg_table('source_table','iceberg_options', 'options', 'parallelism');
 ```
-* `connector` is used to specify the data source, in iceberg migration, it is always `iceberg`.
-* `source_table` is used to specify the source iceberg table to migrate, it's required;
-* `options` is used to specify the additional options for the target paimon table, it's optional.
+* `source_table`, string type, is used to specify the source iceberg table to migrate, it's required.
+* `iceberg_options`, string type, is used to specify the configuration of migration, multiple configuration items are separated by commas. it's required.
+* `options`, string type, is used to specify the additional options for the target paimon table, it's optional.
 * `parallelism`, integer type, is used to specify the parallelism of the migration job, it's optional.
-* `iceberg_options` is used to specify the configuration of migration, multiple configuration items are separated by commas. it's required. 
 
 #### hadoop-catalog
 To migrate iceberg table managed by hadoop-catalog, you need set `metadata.iceberg.storage=hadoop-catalog` and `iceberg_warehouse`. Example:
@@ -67,8 +66,7 @@ CREATE CATALOG paimon_catalog WITH ('type' = 'paimon', 'warehouse' = '/path/to/p
 
 USE CATALOG paimon_catalog;
 
-CALL sys.migrate_table(
-    connector => 'iceberg', 
+CALL sys.migrate_iceberg_table(
     source_table => 'iceberg_db.iceberg_tbl',
     iceberg_options => 'metadata.iceberg.storage=hadoop-catalog,iceberg_warehouse=/path/to/iceberg/warehouse'
 );
@@ -78,14 +76,13 @@ If you want the metadata of the migrated paimon table to be managed by hive, you
 CREATE CATALOG paimon_catalog WITH (
     'type' = 'paimon', 
     'metastore' = 'hive', 
-    'uri' = 'thrift://localhost:9083', 
+    'uri' = 'thrift://<host>:<port>', 
     'warehouse' = '/path/to/paimon/warehouse'
 );
 
 USE CATALOG paimon_catalog;
 
-CALL sys.migrate_table(
-    connector => 'iceberg', 
+CALL sys.migrate_iceberg_table(
     source_table => 'iceberg_db.iceberg_tbl',
     iceberg_options => 'metadata.iceberg.storage=hadoop-catalog,iceberg_warehouse=/path/to/iceberg/warehouse'
 );
@@ -107,19 +104,19 @@ and provide information about Hive Metastore used by the iceberg table in `icebe
     <tbody>
     <tr>
       <td><h5>metadata.iceberg.uri</h5></td>
-      <td style="word-wrap: break-word;"></td>
+      <td style="word-wrap: break-word;">none</td>
       <td>String</td>
       <td>Hive metastore uri for Iceberg Hive catalog.</td>
     </tr>
     <tr>
       <td><h5>metadata.iceberg.hive-conf-dir</h5></td>
-      <td style="word-wrap: break-word;"></td>
+      <td style="word-wrap: break-word;">none</td>
       <td>String</td>
       <td>hive-conf-dir for Iceberg Hive catalog.</td>
     </tr>
     <tr>
       <td><h5>metadata.iceberg.hadoop-conf-dir</h5></td>
-      <td style="word-wrap: break-word;"></td>
+      <td style="word-wrap: break-word;">none</td>
       <td>String</td>
       <td>hadoop-conf-dir for Iceberg Hive catalog.</td>
     </tr>
@@ -137,26 +134,24 @@ Example:
 CREATE CATALOG paimon_catalog WITH (
     'type' = 'paimon', 
     'metastore' = 'hive', 
-    'uri' = 'thrift://localhost:9083', 
+    'uri' = 'thrift://<host>:<port>', 
     'warehouse' = '/path/to/paimon/warehouse'
 );
 
 USE CATALOG paimon_catalog;
 
-CALL sys.migrate_table(
-    connector => 'iceberg', 
+CALL sys.migrate_iceberg_table(
     source_table => 'iceberg_db.iceberg_tbl',
-    iceberg_options => 'metadata.iceberg.storage=hive-catalog,metadata.iceberg.uri=thrift://localhost:9083'
+    iceberg_options => 'metadata.iceberg.storage=hive-catalog,metadata.iceberg.uri=thrift://<host>:<port>'
 );
 ```
 
-### MigrateTableAction
+### MigrateIcebergTableAction
 You can also use flink action for migration:
 ```bash
 <FLINK_HOME>/bin/flink run \
 /path/to/paimon-flink-action-{{< version >}}.jar \
-migrate_table \
---source_type iceberg \
+migrate_iceberg_table \
 --table <icebergDatabase.icebergTable> \
 --iceberg_options <iceberg-conf  [,iceberg-conf ...]> \
 [--parallelism <parallelism>] \
@@ -168,8 +163,7 @@ Example:
 ```bash
 <FLINK_HOME>/bin/flink run \
 /path/to/paimon-flink-action-{{< version >}}.jar \
-migrate_table \
---source_type iceberg \
+migrate_iceberg_table \
 --table iceberg_db.iceberg_tbl \
 --iceberg_options metadata.iceberg.storage=hive-catalog, metadata.iceberg.uri=thrift://localhost:9083 \
 --parallelism 6 \

Option	Default	Type	Description
metadata.iceberg.uri		String	Hive metastore uri for Iceberg Hive catalog.
metadata.iceberg.hive-conf-dir		String	hive-conf-dir for Iceberg Hive catalog.
metadata.iceberg.hadoop-conf-dir		String	hadoop-conf-dir for Iceberg Hive catalog.
metadata.iceberg.hive-client-class	org.apache.hadoop.hive.metastore.HiveMetaStoreClient	String	Hive client class name for Iceberg Hive Catalog.