From 6a2ea461aa1eae6a6a9fc7448c894b8ac119e963 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 20 Oct 2025 22:10:54 +0000
Subject: [PATCH 1/2] Merge pull request #87020 from
 ianton-ru/iceberg_table_name_encode

Fix table name encoding in data lake rest catalog
---
 src/Databases/DataLake/RestCatalog.cpp        | 10 ++++++--
 .../integration/test_database_iceberg/test.py | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/src/Databases/DataLake/RestCatalog.cpp b/src/Databases/DataLake/RestCatalog.cpp
index 8b0d643f3ec1..dd71315fa6f9 100644
--- a/src/Databases/DataLake/RestCatalog.cpp
+++ b/src/Databases/DataLake/RestCatalog.cpp
@@ -263,7 +263,8 @@ DB::ReadWriteBufferFromHTTPPtr RestCatalog::createReadBuffer(
 {
     const auto & context = getContext();
 
-    Poco::URI url(base_url / endpoint);
+    /// enable_url_encoding=false to allow use tables with encoded sequences in names like 'foo%2Fbar'
+    Poco::URI url(base_url / endpoint, /* enable_url_encoding */ false);
     if (!params.empty())
         url.setQueryParameters(params);
 
@@ -496,7 +497,12 @@ DB::Names RestCatalog::parseTables(DB::ReadBuffer & buf, const std::string & bas
         for (size_t i = 0; i < identifiers_object->size(); ++i)
         {
             const auto current_table_json = identifiers_object->get(static_cast<int>(i)).extract<Poco::JSON::Object::Ptr>();
-            const auto table_name = current_table_json->get("name").extract<String>();
+            /// If table has encoded sequence (like 'foo%2Fbar')
+            /// catalog returns decoded character instead of sequence ('foo/bar')
+            /// Here name encoded back to 'foo%2Fbar' format
+            const auto table_name_raw = current_table_json->get("name").extract<String>();
+            std::string table_name;
+            Poco::URI::encode(table_name_raw, "/", table_name);
 
             tables.push_back(base_namespace + "." + table_name);
             if (limit && tables.size() >= limit)
diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py
index 373f98cedc1c..c602f97ae07e 100644
--- a/tests/integration/test_database_iceberg/test.py
+++ b/tests/integration/test_database_iceberg/test.py
@@ -384,3 +384,27 @@ def record(key):
 
     assert 'aaa\naaa\naaa' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name}`").strip()
     assert 'bbb\nbbb\nbbb' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name_2}`").strip()
+
+
+def test_table_with_slash(started_cluster):
+    node = started_cluster.instances["node1"]
+
+    # pyiceberg at current moment (version 0.9.1) has a bug with table names with slashes
+    # see https://github.com/apache/iceberg-python/issues/2462
+    # so we need to encode it manually
+    table_raw_suffix = "table/foo"
+    table_encoded_suffix = "table%2Ffoo"
+
+    test_ref = f"test_list_tables_{uuid.uuid4()}"
+    table_name = f"{test_ref}_{table_raw_suffix}"
+    table_encoded_name = f"{test_ref}_{table_encoded_suffix}"
+    root_namespace = f"{test_ref}_namespace"
+
+    catalog = load_catalog_impl(started_cluster)
+    catalog.create_namespace(root_namespace)
+
+    create_table(catalog, root_namespace, table_name, DEFAULT_SCHEMA, PartitionSpec(), DEFAULT_SORT_ORDER)
+
+    create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
+    node.query(f"INSERT INTO {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}` VALUES (NULL, 'AAPL', 193.24, 193.31, tuple('bot'));", settings={"allow_experimental_insert_into_iceberg": 1, 'write_full_path_in_iceberg_metadata': 1})
+    assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n"

From f3f830d092da94ff88dfc0ae3cf53ab07b298f7a Mon Sep 17 00:00:00 2001
From: Anton Ivashkin <ianton@live.com>
Date: Thu, 6 Nov 2025 13:38:41 +0100
Subject: [PATCH 2/2] Fix test, write with pyiceberg

---
 tests/integration/test_database_iceberg/test.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py
index c602f97ae07e..4d492e62572b 100644
--- a/tests/integration/test_database_iceberg/test.py
+++ b/tests/integration/test_database_iceberg/test.py
@@ -404,7 +404,18 @@ def test_table_with_slash(started_cluster):
     catalog.create_namespace(root_namespace)
 
     create_table(catalog, root_namespace, table_name, DEFAULT_SCHEMA, PartitionSpec(), DEFAULT_SORT_ORDER)
+    table = catalog.load_table(f"{root_namespace}.{table_encoded_name}")
+    data = [
+        {
+            "datetime": datetime.strptime("2025-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"),
+            "symbol": "AAPL",
+            "bid": 193.24,
+            "ask": 193.31,
+            "details": {"created_by": "bot"},
+        }
+    ]
+    df = pa.Table.from_pylist(data)
+    table.append(df)
 
     create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
-    node.query(f"INSERT INTO {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}` VALUES (NULL, 'AAPL', 193.24, 193.31, tuple('bot'));", settings={"allow_experimental_insert_into_iceberg": 1, 'write_full_path_in_iceberg_metadata': 1})
-    assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n"
+    assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "2025-01-01 12:00:00.000000\tAAPL\t193.24\t193.31\t('bot')\n"