apache · JingsongLi · Jan 21, 2025 · Jan 20, 2025 · Jan 20, 2025 · LinMingQiang
diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md
@@ -32,6 +32,22 @@ Just like all other tables, Paimon tables can be queried with `SELECT` statement
 
 Paimon's batch read returns all the data in a snapshot of the table. By default, batch reads return the latest snapshot.
 
+```sql
+-- read all columns
+SELECT * FROM t;
+```
+
+Paimon also supports reading some hidden metadata columns, currently supporting the following columns:
+
+- `__paimon_file_path`: the file path of the record.
+- `__paimon_partition`: the partition of the record.
+- `__paimon_bucket`: the bucket of the record.
+
+```sql
+-- read all columns and the corresponding file path, partition, bucket of the record
+SELECT *, __paimon_file_path, __paimon_partition, __paimon_bucket FROM t;
+```
+
 ### Batch Time Travel
 
 Paimon batch reads with time travel can specify a snapshot or a tag and read the corresponding data.

diff --git a/...on-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala b/...on-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala
@@ -368,6 +368,22 @@ class PaimonQueryTest extends PaimonSparkTestBase {
     }
   }
 
+  test("Paimon Query: query metadata columns") {
+    sql("CREATE TABLE T (a INT, p1 INT, p2 INT) PARTITIONED BY (p1, p2)")
+    sql("INSERT INTO T VALUES (1, 1, 1), (2, 1, 2)")
+    checkAnswer(
+      sql("""
+            |SELECT
+            |*,
+            |element_at(split(__paimon_file_path, '\\.'), -1),
+            |__paimon_partition,
+            |__paimon_bucket
+            |FROM T ORDER BY a
+            |""".stripMargin),
+      Seq(Row(1, 1, 1, "parquet", Row(1, 1), 0), Row(2, 1, 2, "parquet", Row(1, 2), 0))
+    )
+  }
+
   private def getAllFiles(
       tableName: String,
       partitions: Seq[String],