From 47879d906314368ea650511d931fecb3d3d51246 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 14 Jan 2020 12:36:42 +0300 Subject: [PATCH 1/5] Add a test --- .../org/apache/spark/sql/avro/AvroSuite.scala | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 3f2744014c199..b4aa46a1ad971 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1497,6 +1497,32 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { |} """.stripMargin) } + + test("log a warning of ignoreExtension deprecation") { + val logAppender = new LogAppender + withTempPath { dir => + Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1), ("d", 2, 1)) + .toDF("value", "p1", "p2") + .repartition(2) + .write + .format("avro") + .option("header", true) + .save(dir.getCanonicalPath) + withLogAppender(logAppender) { + spark + .read + .format("avro") + .option(AvroOptions.ignoreExtensionKey, false) + .option("header", true) + .load(dir.getCanonicalPath) + .count() + } + val deprecatedEvents = logAppender.loggingEvents + .filter(_.getRenderedMessage.contains( + s"Option ${AvroOptions.ignoreExtensionKey} is deprecated")) + assert(deprecatedEvents.size === 1) + } + } } class AvroV1Suite extends AvroSuite { From 0bb33aa15c813af5b12798b666f782cc2feb7741 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 14 Jan 2020 13:04:15 +0300 Subject: [PATCH 2/5] Bug fix --- .../main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index b978b7974b92d..70dcd58a600fc 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.avro.AvroOptions.ignoreExtensionKey import org.apache.spark.sql.execution.datasources.OutputWriterFactory import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -42,12 +43,12 @@ object AvroUtils extends Logging { options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { val conf = spark.sessionState.newHadoopConf() - if (options.contains("ignoreExtension")) { - logWarning(s"Option ${AvroOptions.ignoreExtensionKey} is deprecated. Please use the " + - "general data source option pathGlobFilter for filtering file names.") - } val parsedOptions = new AvroOptions(options, conf) + if (parsedOptions.parameters.contains(ignoreExtensionKey)) { + logWarning(s"Option $ignoreExtensionKey is deprecated. Please use the " + + "general data source option pathGlobFilter for filtering file names.") + } // User can specify an optional avro json schema. val avroSchema = parsedOptions.schema .map(new Schema.Parser().parse) From 11cbf2ee08385a31e5064297c858cab12752a630 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 14 Jan 2020 13:55:54 +0300 Subject: [PATCH 3/5] Remove .option("header", true) in the added test --- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index b4aa46a1ad971..d00065c828417 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1506,14 +1506,12 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { .repartition(2) .write .format("avro") - .option("header", true) .save(dir.getCanonicalPath) withLogAppender(logAppender) { spark .read .format("avro") .option(AvroOptions.ignoreExtensionKey, false) - .option("header", true) .load(dir.getCanonicalPath) .count() } From 6c53b50d35dc542c80ed2256c492c1b207860585 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 14 Jan 2020 14:02:43 +0300 Subject: [PATCH 4/5] Remove .option("header", true) in another test --- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index d00065c828417..b349ac57892db 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1545,12 +1545,10 @@ class AvroV2Suite extends AvroSuite { .write .format("avro") .partitionBy("p1", "p2") - .option("header", true) .save(dir.getCanonicalPath) val df = spark .read .format("avro") - .option("header", true) .load(dir.getCanonicalPath) .where("p1 = 1 and p2 = 2 and value != \"a\"") From 164cf5bf0368f9a24b4f70406c7363f48894d11c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 14 Jan 2020 21:54:57 +0300 Subject: [PATCH 5/5] Revert "Remove .option("header", true) in another test" This reverts commit 6c53b50d35dc542c80ed2256c492c1b207860585. --- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index b349ac57892db..d00065c828417 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1545,10 +1545,12 @@ class AvroV2Suite extends AvroSuite { .write .format("avro") .partitionBy("p1", "p2") + .option("header", true) .save(dir.getCanonicalPath) val df = spark .read .format("avro") + .option("header", true) .load(dir.getCanonicalPath) .where("p1 = 1 and p2 = 2 and value != \"a\"")