diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala index 241c761624b76..03b5517f6df05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala @@ -41,9 +41,14 @@ case class ProjectionOverSchema(schema: StructType) { case a: GetArrayStructFields => getProjection(a.child).map(p => (p, p.dataType)).map { case (projection, ArrayType(projSchema @ StructType(_), _)) => + // For case-sensitivity aware field resolution, we should take `ordinal` which + // points to correct struct field. + val selectedField = a.child.dataType.asInstanceOf[ArrayType] + .elementType.asInstanceOf[StructType](a.ordinal) + val prunedField = projSchema(selectedField.name) GetArrayStructFields(projection, - projSchema(a.field.name), - projSchema.fieldIndex(a.field.name), + prunedField.copy(name = a.field.name), + projSchema.fieldIndex(selectedField.name), projSchema.size, a.containsNull) case (_, projSchema) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala index a5a42e540151d..4314aad9b46f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala @@ -75,7 +75,11 @@ object SelectedField { val field = c.childSchema(c.ordinal) val newField = field.copy(dataType = dataTypeOpt.getOrElse(field.dataType)) selectField(c.child, Option(struct(newField))) - case GetArrayStructFields(child, field, _, _, containsNull) => + case GetArrayStructFields(child, _, ordinal, _, containsNull) => + // For case-sensitivity aware field resolution, we should take `ordinal` which + // points to correct struct field. + val field = child.dataType.asInstanceOf[ArrayType] + .elementType.asInstanceOf[StructType](ordinal) val newFieldDataType = dataTypeOpt match { case None => // GetArrayStructFields is the top level extractor. This means its result is diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala index c90732183cb7a..765d2fc584a7d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala @@ -774,4 +774,46 @@ abstract class SchemaPruningSuite assert(scanSchema === expectedScanSchema) } } + + testSchemaPruning("SPARK-34963: extract case-insensitive struct field from array") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val query1 = spark.table("contacts") + .select("friends.First", "friends.MiDDle") + checkScan(query1, "struct>>") + checkAnswer(query1, + Row(Array.empty[String], Array.empty[String]) :: + Row(Array("Susan"), Array("Z.")) :: + Row(null, null) :: + Row(null, null) :: Nil) + + val query2 = spark.table("contacts") + .where("friends.First is not null") + .select("friends.First", "friends.MiDDle") + checkScan(query2, "struct>>") + checkAnswer(query2, + Row(Array.empty[String], Array.empty[String]) :: + Row(Array("Susan"), Array("Z.")) :: Nil) + } + } + + testSchemaPruning("SPARK-34963: extract case-insensitive struct field from struct") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val query1 = spark.table("contacts") + .select("Name.First", "NAME.MiDDle") + checkScan(query1, "struct>") + checkAnswer(query1, + Row("Jane", "X.") :: + Row("Janet", null) :: + Row("Jim", null) :: + Row("John", "Y.") :: Nil) + + val query2 = spark.table("contacts") + .where("Name.MIDDLE is not null") + .select("Name.First", "NAME.MiDDle") + checkScan(query2, "struct>") + checkAnswer(query2, + Row("Jane", "X.") :: + Row("John", "Y.") :: Nil) + } + } }