From f747f7f063bf4c0b06fb2bb5e2a72d591aabccb3 Mon Sep 17 00:00:00 2001
From: zouxxyy <zouxinyu.zxy@alibaba-inc.com>
Date: Mon, 20 Jan 2025 15:26:27 +0800
Subject: [PATCH 1/2] 1

---
 docs/content/spark/sql-query.md                  | 12 ++++++++++++
 .../paimon/spark/sql/PaimonQueryTest.scala       | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md
index c97b6d3341b7..a2b49fd8eaee 100644
--- a/docs/content/spark/sql-query.md
+++ b/docs/content/spark/sql-query.md
@@ -32,6 +32,18 @@ Just like all other tables, Paimon tables can be queried with `SELECT` statement
 
 Paimon's batch read returns all the data in a snapshot of the table. By default, batch reads return the latest snapshot.
 
+```sql
+-- read all columns
+SELECT * FROM t;
+```
+
+Paimon also supports reading some hidden metadata columns, such as `__paimon_file_path`, `__paimon_partition`, `__paimon_bucket`.
+
+```sql
+-- read all columns and the corresponding file path, partition, bucket of the record
+SELECT *, __paimon_file_path, __paimon_partition, __paimon_bucket FROM t;
+```
+
 ### Batch Time Travel
 
 Paimon batch reads with time travel can specify a snapshot or a tag and read the corresponding data.
diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
index 08f5275f01b5..d8d621a0e690 100644
--- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
+++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
@@ -368,6 +368,22 @@ class PaimonQueryTest extends PaimonSparkTestBase {
     }
   }
 
+  test("Paimon Query: query metadata columns") {
+    sql("CREATE TABLE T (a INT, p1 INT, p2 INT) PARTITIONED BY (p1, p2)")
+    sql("INSERT INTO T VALUES (1, 1, 1), (2, 1, 2)")
+    checkAnswer(
+      sql("""
+            |SELECT
+            |*,
+            |element_at(split(__paimon_file_path, '\\.'), -1),
+            |__paimon_partition,
+            |__paimon_bucket
+            |FROM T ORDER BY a
+            |""".stripMargin),
+      Seq(Row(1, 1, 1, "parquet", Row(1, 1), 0), Row(2, 1, 2, "parquet", Row(1, 2), 0))
+    )
+  }
+
   private def getAllFiles(
       tableName: String,
       partitions: Seq[String],

From 64ba9e1d6fc88c0534f19679b94190e9bb7d13fc Mon Sep 17 00:00:00 2001
From: zouxxyy <zouxinyu.zxy@alibaba-inc.com>
Date: Mon, 20 Jan 2025 20:13:53 +0800
Subject: [PATCH 2/2] update

---
 docs/content/spark/sql-query.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md
index a2b49fd8eaee..cc420b4534f2 100644
--- a/docs/content/spark/sql-query.md
+++ b/docs/content/spark/sql-query.md
@@ -37,7 +37,11 @@ Paimon's batch read returns all the data in a snapshot of the table. By default,
 SELECT * FROM t;
 ```
 
-Paimon also supports reading some hidden metadata columns, such as `__paimon_file_path`, `__paimon_partition`, `__paimon_bucket`.
+Paimon also supports reading some hidden metadata columns, currently supporting the following columns:
+
+- `__paimon_file_path`: the file path of the record.
+- `__paimon_partition`: the partition of the record.
+- `__paimon_bucket`: the bucket of the record.
 
 ```sql
 -- read all columns and the corresponding file path, partition, bucket of the record