diff --git a/docs/content/spark/sql-query.md b/docs/content/spark/sql-query.md index c97b6d3341b7..cc420b4534f2 100644 --- a/docs/content/spark/sql-query.md +++ b/docs/content/spark/sql-query.md @@ -32,6 +32,22 @@ Just like all other tables, Paimon tables can be queried with `SELECT` statement Paimon's batch read returns all the data in a snapshot of the table. By default, batch reads return the latest snapshot. +```sql +-- read all columns +SELECT * FROM t; +``` + +Paimon also supports reading some hidden metadata columns, currently supporting the following columns: + +- `__paimon_file_path`: the file path of the record. +- `__paimon_partition`: the partition of the record. +- `__paimon_bucket`: the bucket of the record. + +```sql +-- read all columns and the corresponding file path, partition, bucket of the record +SELECT *, __paimon_file_path, __paimon_partition, __paimon_bucket FROM t; +``` + ### Batch Time Travel Paimon batch reads with time travel can specify a snapshot or a tag and read the corresponding data. diff --git a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala index 08f5275f01b5..d8d621a0e690 100644 --- a/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala @@ -368,6 +368,22 @@ class PaimonQueryTest extends PaimonSparkTestBase { } } + test("Paimon Query: query metadata columns") { + sql("CREATE TABLE T (a INT, p1 INT, p2 INT) PARTITIONED BY (p1, p2)") + sql("INSERT INTO T VALUES (1, 1, 1), (2, 1, 2)") + checkAnswer( + sql(""" + |SELECT + |*, + |element_at(split(__paimon_file_path, '\\.'), -1), + |__paimon_partition, + |__paimon_bucket + |FROM T ORDER BY a + |""".stripMargin), + Seq(Row(1, 1, 1, "parquet", Row(1, 1), 0), Row(2, 1, 2, "parquet", Row(1, 2), 0)) + ) + } + private def getAllFiles( tableName: String, partitions: Seq[String],