From 4e1065a3c877b301fce7a5686bd0fc7597e01963 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 11 Jan 2020 14:13:31 +0300 Subject: [PATCH 1/3] Output log warning --- .../apache/spark/sql/avro/AvroOptions.scala | 15 ++++++++- .../org/apache/spark/sql/avro/AvroSuite.scala | 33 +++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index f3ea78583fbc0..2086850c322e2 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -70,6 +70,9 @@ class AvroOptions( */ @deprecated("Use the general data source option pathGlobFilter for filtering file names", "3.0") val ignoreExtension: Boolean = { + def warn(s: String): Unit = logWarning( + s"$s is deprecated, and it will be not use by Avro datasource in the future releases. " + + "Use the general data source option pathGlobFilter for filtering file names.") val ignoreFilesWithoutExtensionByDefault = false val ignoreFilesWithoutExtension = conf.getBoolean( AvroFileFormat.IgnoreFilesWithoutExtensionProperty, @@ -78,7 +81,17 @@ class AvroOptions( parameters .get(AvroOptions.ignoreExtensionKey) .map(_.toBoolean) - .getOrElse(!ignoreFilesWithoutExtension) + .map { ignoreExtensionOption => + if (ignoreExtensionOption != !ignoreFilesWithoutExtensionByDefault) { + warn(s"The Avro option '${AvroOptions.ignoreExtensionKey}'") + } + ignoreExtensionOption + }.getOrElse { + if (ignoreFilesWithoutExtension != ignoreFilesWithoutExtensionByDefault) { + warn(s"The Hadoop's config '${AvroFileFormat.IgnoreFilesWithoutExtensionProperty}'") + } + !ignoreFilesWithoutExtension + } } /** diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 3f2744014c199..cd4a7f4f9fa75 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -24,6 +24,8 @@ import java.sql.{Date, Timestamp} import java.util.{Locale, TimeZone, UUID} import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.language.reflectiveCalls import org.apache.avro.Schema import org.apache.avro.Schema.{Field, Type} @@ -32,6 +34,8 @@ import org.apache.avro.file.{DataFileReader, DataFileWriter} import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed} import org.apache.commons.io.FileUtils +import org.apache.log4j.AppenderSkeleton +import org.apache.log4j.spi.LoggingEvent import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql._ @@ -1497,6 +1501,35 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { |} """.stripMargin) } + + + test("deprecation warning for ignoreExtension") { + val logAppender = new AppenderSkeleton { + val loggingEvents = new ArrayBuffer[LoggingEvent]() + + override def append(loggingEvent: LoggingEvent): Unit = loggingEvents.append(loggingEvent) + override def close(): Unit = {} + override def requiresLayout(): Boolean = false + } + def check(key: String): Unit = { + assert(logAppender.loggingEvents.exists( + _.getRenderedMessage.contains(s"'$key' is deprecated"))) + } + + withLogAppender(logAppender) { + withSQLConf(AvroFileFormat.IgnoreFilesWithoutExtensionProperty -> "true") { + spark.read.format("avro").load(testAvro).collect() + } + } + check(AvroFileFormat.IgnoreFilesWithoutExtensionProperty) + + withLogAppender(logAppender) { + spark.read + .option(AvroOptions.ignoreExtensionKey, false) + .format("avro").load(testAvro).collect() + } + check(AvroOptions.ignoreExtensionKey) + } } class AvroV1Suite extends AvroSuite { From 78077ebd19e88854958df28044521dfdd0fdf69b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 11 Jan 2020 14:14:02 +0300 Subject: [PATCH 2/3] Remove @deprecated --- .../src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index 2086850c322e2..1361d4152403c 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -68,7 +68,6 @@ class AvroOptions( * If the option is not set, the Hadoop's config `avro.mapred.ignore.inputs.without.extension` * is taken into account. If the former one is not set too, file extensions are ignored. */ - @deprecated("Use the general data source option pathGlobFilter for filtering file names", "3.0") val ignoreExtension: Boolean = { def warn(s: String): Unit = logWarning( s"$s is deprecated, and it will be not use by Avro datasource in the future releases. " + From e25f1954d62972476e3bb4bdb58fa6fd8921d51c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 11 Jan 2020 14:19:08 +0300 Subject: [PATCH 3/3] Remove logWarning from inferSchema() --- .../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index b978b7974b92d..3607d71c1598d 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -42,10 +42,6 @@ object AvroUtils extends Logging { options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { val conf = spark.sessionState.newHadoopConf() - if (options.contains("ignoreExtension")) { - logWarning(s"Option ${AvroOptions.ignoreExtensionKey} is deprecated. Please use the " + - "general data source option pathGlobFilter for filtering file names.") - } val parsedOptions = new AvroOptions(options, conf) // User can specify an optional avro json schema.