Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ case class ProjectionOverSchema(schema: StructType) {
case a: GetArrayStructFields =>
getProjection(a.child).map(p => (p, p.dataType)).map {
case (projection, ArrayType(projSchema @ StructType(_), _)) =>
// For case-sensitivity aware field resolution, we should take `ordinal` which
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about leaving your comment ExtractValue actually does column name resolving correctly here, too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I missed this comment. As it is minor, I will add the comment in #31966 for master only.

// points to correct struct field.
val selectedField = a.child.dataType.asInstanceOf[ArrayType]
.elementType.asInstanceOf[StructType](a.ordinal)
val prunedField = projSchema(selectedField.name)
GetArrayStructFields(projection,
projSchema(a.field.name),
projSchema.fieldIndex(a.field.name),
prunedField.copy(name = a.field.name),
projSchema.fieldIndex(selectedField.name),
projSchema.size,
a.containsNull)
case (_, projSchema) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ object SelectedField {
val field = c.childSchema(c.ordinal)
val newField = field.copy(dataType = dataTypeOpt.getOrElse(field.dataType))
selectField(c.child, Option(struct(newField)))
case GetArrayStructFields(child, field, _, _, containsNull) =>
case GetArrayStructFields(child, _, ordinal, _, containsNull) =>
// For case-sensitivity aware field resolution, we should take `ordinal` which
// points to correct struct field.
val field = child.dataType.asInstanceOf[ArrayType]
.elementType.asInstanceOf[StructType](ordinal)
val newFieldDataType = dataTypeOpt match {
case None =>
// GetArrayStructFields is the top level extractor. This means its result is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,4 +774,46 @@ abstract class SchemaPruningSuite
assert(scanSchema === expectedScanSchema)
}
}

testSchemaPruning("SPARK-34963: extract case-insensitive struct field from array") {
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
val query1 = spark.table("contacts")
.select("friends.First", "friends.MiDDle")
checkScan(query1, "struct<friends:array<struct<first:string,middle:string>>>")
checkAnswer(query1,
Row(Array.empty[String], Array.empty[String]) ::
Row(Array("Susan"), Array("Z.")) ::
Row(null, null) ::
Row(null, null) :: Nil)

val query2 = spark.table("contacts")
.where("friends.First is not null")
.select("friends.First", "friends.MiDDle")
checkScan(query2, "struct<friends:array<struct<first:string,middle:string>>>")
checkAnswer(query2,
Row(Array.empty[String], Array.empty[String]) ::
Row(Array("Susan"), Array("Z.")) :: Nil)
}
}

testSchemaPruning("SPARK-34963: extract case-insensitive struct field from struct") {
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
val query1 = spark.table("contacts")
.select("Name.First", "NAME.MiDDle")
checkScan(query1, "struct<name:struct<first:string,middle:string>>")
checkAnswer(query1,
Row("Jane", "X.") ::
Row("Janet", null) ::
Row("Jim", null) ::
Row("John", "Y.") :: Nil)

val query2 = spark.table("contacts")
.where("Name.MIDDLE is not null")
.select("Name.First", "NAME.MiDDle")
checkScan(query2, "struct<name:struct<first:string,middle:string>>")
checkAnswer(query2,
Row("Jane", "X.") ::
Row("John", "Y.") :: Nil)
}
}
}